diff options
31 files changed, 743 insertions, 477 deletions
diff --git a/.gitmodules b/.gitmodules index 2558a5ebc..26b4e5272 100644 --- a/.gitmodules +++ b/.gitmodules | |||
| @@ -40,3 +40,6 @@ | |||
| 40 | [submodule "Vulkan-Headers"] | 40 | [submodule "Vulkan-Headers"] |
| 41 | path = externals/Vulkan-Headers | 41 | path = externals/Vulkan-Headers |
| 42 | url = https://github.com/KhronosGroup/Vulkan-Headers.git | 42 | url = https://github.com/KhronosGroup/Vulkan-Headers.git |
| 43 | [submodule "externals/zstd"] | ||
| 44 | path = externals/zstd | ||
| 45 | url = https://github.com/facebook/zstd | ||
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index e156bbece..aa3319eb1 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt | |||
| @@ -49,6 +49,10 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL) | |||
| 49 | add_library(unicorn-headers INTERFACE) | 49 | add_library(unicorn-headers INTERFACE) |
| 50 | target_include_directories(unicorn-headers INTERFACE ./unicorn/include) | 50 | target_include_directories(unicorn-headers INTERFACE ./unicorn/include) |
| 51 | 51 | ||
| 52 | # Zstandard | ||
| 53 | add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL) | ||
| 54 | target_include_directories(libzstd_static INTERFACE ./zstd/lib) | ||
| 55 | |||
| 52 | # SoundTouch | 56 | # SoundTouch |
| 53 | add_subdirectory(soundtouch) | 57 | add_subdirectory(soundtouch) |
| 54 | 58 | ||
diff --git a/externals/zstd b/externals/zstd new file mode 160000 | |||
| Subproject 470344d33e1d52a2ada75d278466da8d4ee2faf | |||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 5639021d3..1e8e1b215 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -125,6 +125,8 @@ add_library(common STATIC | |||
| 125 | uint128.h | 125 | uint128.h |
| 126 | vector_math.h | 126 | vector_math.h |
| 127 | web_result.h | 127 | web_result.h |
| 128 | zstd_compression.cpp | ||
| 129 | zstd_compression.h | ||
| 128 | ) | 130 | ) |
| 129 | 131 | ||
| 130 | if(ARCHITECTURE_x86_64) | 132 | if(ARCHITECTURE_x86_64) |
| @@ -138,4 +140,4 @@ endif() | |||
| 138 | create_target_directory_groups(common) | 140 | create_target_directory_groups(common) |
| 139 | 141 | ||
| 140 | target_link_libraries(common PUBLIC Boost::boost fmt microprofile) | 142 | target_link_libraries(common PUBLIC Boost::boost fmt microprofile) |
| 141 | target_link_libraries(common PRIVATE lz4_static) | 143 | target_link_libraries(common PRIVATE lz4_static libzstd_static) |
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp new file mode 100644 index 000000000..60a35c67c --- /dev/null +++ b/src/common/zstd_compression.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <zstd.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/zstd_compression.h" | ||
| 12 | |||
| 13 | namespace Common::Compression { | ||
| 14 | |||
| 15 | std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) { | ||
| 16 | compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel()); | ||
| 17 | |||
| 18 | const std::size_t max_compressed_size = ZSTD_compressBound(source_size); | ||
| 19 | std::vector<u8> compressed(max_compressed_size); | ||
| 20 | |||
| 21 | const std::size_t compressed_size = | ||
| 22 | ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level); | ||
| 23 | |||
| 24 | if (ZSTD_isError(compressed_size)) { | ||
| 25 | // Compression failed | ||
| 26 | return {}; | ||
| 27 | } | ||
| 28 | |||
| 29 | compressed.resize(compressed_size); | ||
| 30 | |||
| 31 | return compressed; | ||
| 32 | } | ||
| 33 | |||
| 34 | std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) { | ||
| 35 | return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT); | ||
| 36 | } | ||
| 37 | |||
| 38 | std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) { | ||
| 39 | const std::size_t decompressed_size = | ||
| 40 | ZSTD_getDecompressedSize(compressed.data(), compressed.size()); | ||
| 41 | std::vector<u8> decompressed(decompressed_size); | ||
| 42 | |||
| 43 | const std::size_t uncompressed_result_size = ZSTD_decompress( | ||
| 44 | decompressed.data(), decompressed.size(), compressed.data(), compressed.size()); | ||
| 45 | |||
| 46 | if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) { | ||
| 47 | // Decompression failed | ||
| 48 | return {}; | ||
| 49 | } | ||
| 50 | return decompressed; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Common::Compression | ||
diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h new file mode 100644 index 000000000..e0a64b035 --- /dev/null +++ b/src/common/zstd_compression.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Common::Compression { | ||
| 10 | |||
| 11 | /** | ||
| 12 | * Compresses a source memory region with Zstandard and returns the compressed data in a vector. | ||
| 13 | * | ||
| 14 | * @param source the uncompressed source memory region. | ||
| 15 | * @param source_size the size in bytes of the uncompressed source memory region. | ||
| 16 | * @param compression_level the used compression level. Should be between 1 and 22. | ||
| 17 | * | ||
| 18 | * @return the compressed data. | ||
| 19 | */ | ||
| 20 | std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level); | ||
| 21 | |||
| 22 | /** | ||
| 23 | * Compresses a source memory region with Zstandard with the default compression level and returns | ||
| 24 | * the compressed data in a vector. | ||
| 25 | * | ||
| 26 | * @param source the uncompressed source memory region. | ||
| 27 | * @param source_size the size in bytes of the uncompressed source memory region. | ||
| 28 | * | ||
| 29 | * @return the compressed data. | ||
| 30 | */ | ||
| 31 | std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size); | ||
| 32 | |||
| 33 | /** | ||
| 34 | * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector. | ||
| 35 | * | ||
| 36 | * @param compressed the compressed source memory region. | ||
| 37 | * | ||
| 38 | * @return the decompressed data. | ||
| 39 | */ | ||
| 40 | std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed); | ||
| 41 | |||
| 42 | } // namespace Common::Compression \ No newline at end of file | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index f64e4c6a6..49145911b 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -163,7 +163,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64) | |||
| 163 | 163 | ||
| 164 | void ARM_Dynarmic::Run() { | 164 | void ARM_Dynarmic::Run() { |
| 165 | MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); | 165 | MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); |
| 166 | ASSERT(Memory::GetCurrentPageTable() == current_page_table); | ||
| 167 | 166 | ||
| 168 | jit->Run(); | 167 | jit->Run(); |
| 169 | } | 168 | } |
| @@ -278,7 +277,6 @@ void ARM_Dynarmic::ClearExclusiveState() { | |||
| 278 | 277 | ||
| 279 | void ARM_Dynarmic::PageTableChanged() { | 278 | void ARM_Dynarmic::PageTableChanged() { |
| 280 | jit = MakeJit(); | 279 | jit = MakeJit(); |
| 281 | current_page_table = Memory::GetCurrentPageTable(); | ||
| 282 | } | 280 | } |
| 283 | 281 | ||
| 284 | DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} | 282 | DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 81e0b4ac0..d867c2a50 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h | |||
| @@ -12,10 +12,6 @@ | |||
| 12 | #include "core/arm/exclusive_monitor.h" | 12 | #include "core/arm/exclusive_monitor.h" |
| 13 | #include "core/arm/unicorn/arm_unicorn.h" | 13 | #include "core/arm/unicorn/arm_unicorn.h" |
| 14 | 14 | ||
| 15 | namespace Common { | ||
| 16 | struct PageTable; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace Core::Timing { | 15 | namespace Core::Timing { |
| 20 | class CoreTiming; | 16 | class CoreTiming; |
| 21 | } | 17 | } |
| @@ -69,8 +65,6 @@ private: | |||
| 69 | std::size_t core_index; | 65 | std::size_t core_index; |
| 70 | Timing::CoreTiming& core_timing; | 66 | Timing::CoreTiming& core_timing; |
| 71 | DynarmicExclusiveMonitor& exclusive_monitor; | 67 | DynarmicExclusiveMonitor& exclusive_monitor; |
| 72 | |||
| 73 | Common::PageTable* current_page_table = nullptr; | ||
| 74 | }; | 68 | }; |
| 75 | 69 | ||
| 76 | class DynarmicExclusiveMonitor final : public ExclusiveMonitor { | 70 | class DynarmicExclusiveMonitor final : public ExclusiveMonitor { |
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 68406eb63..ac0e1d796 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h | |||
| @@ -139,10 +139,8 @@ public: | |||
| 139 | context->AddDomainObject(std::move(iface)); | 139 | context->AddDomainObject(std::move(iface)); |
| 140 | } else { | 140 | } else { |
| 141 | auto& kernel = Core::System::GetInstance().Kernel(); | 141 | auto& kernel = Core::System::GetInstance().Kernel(); |
| 142 | auto sessions = | 142 | auto [server, client] = |
| 143 | Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName()); | 143 | Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName()); |
| 144 | auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions); | ||
| 145 | auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions); | ||
| 146 | iface->ClientConnected(server); | 144 | iface->ClientConnected(server); |
| 147 | context->AddMoveObject(std::move(client)); | 145 | context->AddMoveObject(std::move(client)); |
| 148 | } | 146 | } |
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index aa432658e..744b1697d 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp | |||
| @@ -2,8 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "core/hle/kernel/client_port.h" | 5 | #include "core/hle/kernel/client_port.h" |
| 8 | #include "core/hle/kernel/client_session.h" | 6 | #include "core/hle/kernel/client_session.h" |
| 9 | #include "core/hle/kernel/errors.h" | 7 | #include "core/hle/kernel/errors.h" |
| @@ -31,18 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() { | |||
| 31 | active_sessions++; | 29 | active_sessions++; |
| 32 | 30 | ||
| 33 | // Create a new session pair, let the created sessions inherit the parent port's HLE handler. | 31 | // Create a new session pair, let the created sessions inherit the parent port's HLE handler. |
| 34 | auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); | 32 | auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); |
| 35 | 33 | ||
| 36 | if (server_port->HasHLEHandler()) { | 34 | if (server_port->HasHLEHandler()) { |
| 37 | server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); | 35 | server_port->GetHLEHandler()->ClientConnected(server); |
| 38 | } else { | 36 | } else { |
| 39 | server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions)); | 37 | server_port->AppendPendingSession(server); |
| 40 | } | 38 | } |
| 41 | 39 | ||
| 42 | // Wake the threads waiting on the ServerPort | 40 | // Wake the threads waiting on the ServerPort |
| 43 | server_port->WakeupAllWaitingThreads(); | 41 | server_port->WakeupAllWaitingThreads(); |
| 44 | 42 | ||
| 45 | return MakeResult(std::get<SharedPtr<ClientSession>>(sessions)); | 43 | return MakeResult(client); |
| 46 | } | 44 | } |
| 47 | 45 | ||
| 48 | void ClientPort::ConnectionClosed() { | 46 | void ClientPort::ConnectionClosed() { |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 3f14bfa86..4d58e7c69 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "core/hle/kernel/thread.h" | 21 | #include "core/hle/kernel/thread.h" |
| 22 | #include "core/hle/lock.h" | 22 | #include "core/hle/lock.h" |
| 23 | #include "core/hle/result.h" | 23 | #include "core/hle/result.h" |
| 24 | #include "core/memory.h" | ||
| 24 | 25 | ||
| 25 | namespace Kernel { | 26 | namespace Kernel { |
| 26 | 27 | ||
| @@ -181,6 +182,7 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) { | |||
| 181 | 182 | ||
| 182 | void KernelCore::MakeCurrentProcess(Process* process) { | 183 | void KernelCore::MakeCurrentProcess(Process* process) { |
| 183 | impl->current_process = process; | 184 | impl->current_process = process; |
| 185 | Memory::SetCurrentPageTable(&process->VMManager().page_table); | ||
| 184 | } | 186 | } |
| 185 | 187 | ||
| 186 | Process* KernelCore::CurrentProcess() { | 188 | Process* KernelCore::CurrentProcess() { |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 041267318..26c6b95ab 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -32,9 +32,6 @@ namespace { | |||
| 32 | * @param priority The priority to give the main thread | 32 | * @param priority The priority to give the main thread |
| 33 | */ | 33 | */ |
| 34 | void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { | 34 | void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { |
| 35 | // Setup page table so we can write to memory | ||
| 36 | Memory::SetCurrentPageTable(&owner_process.VMManager().page_table); | ||
| 37 | |||
| 38 | // Initialize new "main" thread | 35 | // Initialize new "main" thread |
| 39 | const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); | 36 | const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); |
| 40 | auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, | 37 | auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, |
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index ac501bf7f..e8447b69a 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp | |||
| @@ -101,7 +101,6 @@ void Scheduler::SwitchContext(Thread* new_thread) { | |||
| 101 | auto* const thread_owner_process = current_thread->GetOwnerProcess(); | 101 | auto* const thread_owner_process = current_thread->GetOwnerProcess(); |
| 102 | if (previous_process != thread_owner_process) { | 102 | if (previous_process != thread_owner_process) { |
| 103 | system.Kernel().MakeCurrentProcess(thread_owner_process); | 103 | system.Kernel().MakeCurrentProcess(thread_owner_process); |
| 104 | Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table); | ||
| 105 | } | 104 | } |
| 106 | 105 | ||
| 107 | cpu_core.LoadContext(new_thread->GetContext()); | 106 | cpu_core.LoadContext(new_thread->GetContext()); |
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index 708fdf9e1..02e7c60e6 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp | |||
| @@ -39,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) { | |||
| 39 | ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); | 39 | ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair( | 42 | ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions, |
| 43 | KernelCore& kernel, u32 max_sessions, std::string name) { | 43 | std::string name) { |
| 44 | |||
| 45 | SharedPtr<ServerPort> server_port(new ServerPort(kernel)); | 44 | SharedPtr<ServerPort> server_port(new ServerPort(kernel)); |
| 46 | SharedPtr<ClientPort> client_port(new ClientPort(kernel)); | 45 | SharedPtr<ClientPort> client_port(new ClientPort(kernel)); |
| 47 | 46 | ||
| @@ -51,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP | |||
| 51 | client_port->max_sessions = max_sessions; | 50 | client_port->max_sessions = max_sessions; |
| 52 | client_port->active_sessions = 0; | 51 | client_port->active_sessions = 0; |
| 53 | 52 | ||
| 54 | return std::make_tuple(std::move(server_port), std::move(client_port)); | 53 | return std::make_pair(std::move(server_port), std::move(client_port)); |
| 55 | } | 54 | } |
| 56 | 55 | ||
| 57 | } // namespace Kernel | 56 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h index 76293cb8b..fef573b71 100644 --- a/src/core/hle/kernel/server_port.h +++ b/src/core/hle/kernel/server_port.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <tuple> | 9 | #include <utility> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/kernel/object.h" | 12 | #include "core/hle/kernel/object.h" |
| @@ -23,6 +23,7 @@ class SessionRequestHandler; | |||
| 23 | class ServerPort final : public WaitObject { | 23 | class ServerPort final : public WaitObject { |
| 24 | public: | 24 | public: |
| 25 | using HLEHandler = std::shared_ptr<SessionRequestHandler>; | 25 | using HLEHandler = std::shared_ptr<SessionRequestHandler>; |
| 26 | using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>; | ||
| 26 | 27 | ||
| 27 | /** | 28 | /** |
| 28 | * Creates a pair of ServerPort and an associated ClientPort. | 29 | * Creates a pair of ServerPort and an associated ClientPort. |
| @@ -32,8 +33,8 @@ public: | |||
| 32 | * @param name Optional name of the ports | 33 | * @param name Optional name of the ports |
| 33 | * @return The created port tuple | 34 | * @return The created port tuple |
| 34 | */ | 35 | */ |
| 35 | static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair( | 36 | static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions, |
| 36 | KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort"); | 37 | std::string name = "UnknownPort"); |
| 37 | 38 | ||
| 38 | std::string GetTypeName() const override { | 39 | std::string GetTypeName() const override { |
| 39 | return "ServerPort"; | 40 | return "ServerPort"; |
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 40cec143e..a6b2cf06a 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp | |||
| @@ -204,6 +204,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel, | |||
| 204 | client_session->parent = parent; | 204 | client_session->parent = parent; |
| 205 | server_session->parent = parent; | 205 | server_session->parent = parent; |
| 206 | 206 | ||
| 207 | return std::make_tuple(std::move(server_session), std::move(client_session)); | 207 | return std::make_pair(std::move(server_session), std::move(client_session)); |
| 208 | } | 208 | } |
| 209 | } // namespace Kernel | 209 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 3429a326f..09b835ff8 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <utility> | ||
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | 11 | ||
| 11 | #include "core/hle/kernel/object.h" | 12 | #include "core/hle/kernel/object.h" |
| @@ -58,7 +59,7 @@ public: | |||
| 58 | return parent.get(); | 59 | return parent.get(); |
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; | 62 | using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; |
| 62 | 63 | ||
| 63 | /** | 64 | /** |
| 64 | * Creates a pair of ServerSession and an associated ClientSession. | 65 | * Creates a pair of ServerSession and an associated ClientSession. |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 332c1037c..4e0538bc2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -38,10 +38,6 @@ void SetCurrentPageTable(Common::PageTable* page_table) { | |||
| 38 | } | 38 | } |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | Common::PageTable* GetCurrentPageTable() { | ||
| 42 | return current_page_table; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, | 41 | static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, |
| 46 | Common::PageType type) { | 42 | Common::PageType type) { |
| 47 | LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, | 43 | LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, |
diff --git a/src/core/memory.h b/src/core/memory.h index 1d38cdca8..6845f5fe1 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -28,16 +28,6 @@ constexpr u64 PAGE_MASK = PAGE_SIZE - 1; | |||
| 28 | 28 | ||
| 29 | /// Virtual user-space memory regions | 29 | /// Virtual user-space memory regions |
| 30 | enum : VAddr { | 30 | enum : VAddr { |
| 31 | /// Read-only page containing kernel and system configuration values. | ||
| 32 | CONFIG_MEMORY_VADDR = 0x1FF80000, | ||
| 33 | CONFIG_MEMORY_SIZE = 0x00001000, | ||
| 34 | CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE, | ||
| 35 | |||
| 36 | /// Usually read-only page containing mostly values read from hardware. | ||
| 37 | SHARED_PAGE_VADDR = 0x1FF81000, | ||
| 38 | SHARED_PAGE_SIZE = 0x00001000, | ||
| 39 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, | ||
| 40 | |||
| 41 | /// TLS (Thread-Local Storage) related. | 31 | /// TLS (Thread-Local Storage) related. |
| 42 | TLS_ENTRY_SIZE = 0x200, | 32 | TLS_ENTRY_SIZE = 0x200, |
| 43 | 33 | ||
| @@ -50,9 +40,8 @@ enum : VAddr { | |||
| 50 | KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, | 40 | KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, |
| 51 | }; | 41 | }; |
| 52 | 42 | ||
| 53 | /// Currently active page table | 43 | /// Changes the currently active page table. |
| 54 | void SetCurrentPageTable(Common::PageTable* page_table); | 44 | void SetCurrentPageTable(Common::PageTable* page_table); |
| 55 | Common::PageTable* GetCurrentPageTable(); | ||
| 56 | 45 | ||
| 57 | /// Determines if the given VAddr is valid for the specified process. | 46 | /// Determines if the given VAddr is valid for the specified process. |
| 58 | bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); | 47 | bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); |
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 3e1a735c3..58af41f6e 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp | |||
| @@ -17,7 +17,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) | |||
| 17 | : mutable_memory(mutable_memory_), | 17 | : mutable_memory(mutable_memory_), |
| 18 | test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { | 18 | test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { |
| 19 | auto process = Kernel::Process::Create(Core::System::GetInstance(), ""); | 19 | auto process = Kernel::Process::Create(Core::System::GetInstance(), ""); |
| 20 | kernel.MakeCurrentProcess(process.get()); | ||
| 21 | page_table = &process->VMManager().page_table; | 20 | page_table = &process->VMManager().page_table; |
| 22 | 21 | ||
| 23 | std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); | 22 | std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); |
| @@ -28,7 +27,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) | |||
| 28 | Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); | 27 | Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); |
| 29 | Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); | 28 | Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); |
| 30 | 29 | ||
| 31 | Memory::SetCurrentPageTable(page_table); | 30 | kernel.MakeCurrentProcess(process.get()); |
| 32 | } | 31 | } |
| 33 | 32 | ||
| 34 | TestEnvironment::~TestEnvironment() { | 33 | TestEnvironment::~TestEnvironment() { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 30b29e14d..4461083ff 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 31 | 31 | ||
| 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { | 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { |
| 33 | auto& rasterizer{renderer.Rasterizer()}; | 33 | auto& rasterizer{renderer.Rasterizer()}; |
| 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(); | 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); |
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8417324ff..0f4e820aa 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,16 +5,13 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | ||
| 9 | #include "core/memory.h" | 8 | #include "core/memory.h" |
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/memory_manager.h" | 9 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_base.h" | ||
| 14 | 11 | ||
| 15 | namespace Tegra { | 12 | namespace Tegra { |
| 16 | 13 | ||
| 17 | MemoryManager::MemoryManager() { | 14 | MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { |
| 18 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); | 15 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 19 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), | 16 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 20 | Common::PageType::Unmapped); | 17 | Common::PageType::Unmapped); |
| @@ -70,8 +67,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 70 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 67 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 71 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | 68 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; |
| 72 | 69 | ||
| 73 | Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr, | 70 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); |
| 74 | aligned_size); | ||
| 75 | UnmapRange(gpu_addr, aligned_size); | 71 | UnmapRange(gpu_addr, aligned_size); |
| 76 | 72 | ||
| 77 | return gpu_addr; | 73 | return gpu_addr; |
| @@ -204,14 +200,85 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 204 | } | 200 | } |
| 205 | 201 | ||
| 206 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { | 202 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { |
| 207 | std::memcpy(dest_buffer, GetPointer(src_addr), size); | 203 | std::size_t remaining_size{size}; |
| 204 | std::size_t page_index{src_addr >> page_bits}; | ||
| 205 | std::size_t page_offset{src_addr & page_mask}; | ||
| 206 | |||
| 207 | while (remaining_size > 0) { | ||
| 208 | const std::size_t copy_amount{ | ||
| 209 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 210 | |||
| 211 | switch (page_table.attributes[page_index]) { | ||
| 212 | case Common::PageType::Memory: { | ||
| 213 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||
| 214 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | ||
| 215 | std::memcpy(dest_buffer, src_ptr, copy_amount); | ||
| 216 | break; | ||
| 217 | } | ||
| 218 | default: | ||
| 219 | UNREACHABLE(); | ||
| 220 | } | ||
| 221 | |||
| 222 | page_index++; | ||
| 223 | page_offset = 0; | ||
| 224 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 225 | remaining_size -= copy_amount; | ||
| 226 | } | ||
| 208 | } | 227 | } |
| 228 | |||
| 209 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { | 229 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { |
| 210 | std::memcpy(GetPointer(dest_addr), src_buffer, size); | 230 | std::size_t remaining_size{size}; |
| 231 | std::size_t page_index{dest_addr >> page_bits}; | ||
| 232 | std::size_t page_offset{dest_addr & page_mask}; | ||
| 233 | |||
| 234 | while (remaining_size > 0) { | ||
| 235 | const std::size_t copy_amount{ | ||
| 236 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 237 | |||
| 238 | switch (page_table.attributes[page_index]) { | ||
| 239 | case Common::PageType::Memory: { | ||
| 240 | u8* dest_ptr{page_table.pointers[page_index] + page_offset}; | ||
| 241 | rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); | ||
| 242 | std::memcpy(dest_ptr, src_buffer, copy_amount); | ||
| 243 | break; | ||
| 244 | } | ||
| 245 | default: | ||
| 246 | UNREACHABLE(); | ||
| 247 | } | ||
| 248 | |||
| 249 | page_index++; | ||
| 250 | page_offset = 0; | ||
| 251 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||
| 252 | remaining_size -= copy_amount; | ||
| 253 | } | ||
| 211 | } | 254 | } |
| 212 | 255 | ||
| 213 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { | 256 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { |
| 214 | std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); | 257 | std::size_t remaining_size{size}; |
| 258 | std::size_t page_index{src_addr >> page_bits}; | ||
| 259 | std::size_t page_offset{src_addr & page_mask}; | ||
| 260 | |||
| 261 | while (remaining_size > 0) { | ||
| 262 | const std::size_t copy_amount{ | ||
| 263 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 264 | |||
| 265 | switch (page_table.attributes[page_index]) { | ||
| 266 | case Common::PageType::Memory: { | ||
| 267 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||
| 268 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | ||
| 269 | WriteBlock(dest_addr, src_ptr, copy_amount); | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | default: | ||
| 273 | UNREACHABLE(); | ||
| 274 | } | ||
| 275 | |||
| 276 | page_index++; | ||
| 277 | page_offset = 0; | ||
| 278 | dest_addr += static_cast<VAddr>(copy_amount); | ||
| 279 | src_addr += static_cast<VAddr>(copy_amount); | ||
| 280 | remaining_size -= copy_amount; | ||
| 281 | } | ||
| 215 | } | 282 | } |
| 216 | 283 | ||
| 217 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | 284 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| @@ -351,7 +418,7 @@ MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) { | |||
| 351 | const VirtualMemoryArea& vma{vma_handle->second}; | 418 | const VirtualMemoryArea& vma{vma_handle->second}; |
| 352 | if (vma.type == VirtualMemoryArea::Type::Mapped) { | 419 | if (vma.type == VirtualMemoryArea::Type::Mapped) { |
| 353 | // Region is already allocated | 420 | // Region is already allocated |
| 354 | return {}; | 421 | return vma_handle; |
| 355 | } | 422 | } |
| 356 | 423 | ||
| 357 | const VAddr start_in_vma{base - vma.base}; | 424 | const VAddr start_in_vma{base - vma.base}; |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 178e2f655..647cbf93a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/page_table.h" | 11 | #include "common/page_table.h" |
| 12 | 12 | ||
| 13 | namespace VideoCore { | ||
| 14 | class RasterizerInterface; | ||
| 15 | } | ||
| 16 | |||
| 13 | namespace Tegra { | 17 | namespace Tegra { |
| 14 | 18 | ||
| 15 | /** | 19 | /** |
| @@ -43,7 +47,7 @@ struct VirtualMemoryArea { | |||
| 43 | 47 | ||
| 44 | class MemoryManager final { | 48 | class MemoryManager final { |
| 45 | public: | 49 | public: |
| 46 | MemoryManager(); | 50 | MemoryManager(VideoCore::RasterizerInterface& rasterizer); |
| 47 | 51 | ||
| 48 | GPUVAddr AllocateSpace(u64 size, u64 align); | 52 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| 49 | GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); | 53 | GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); |
| @@ -144,6 +148,7 @@ private: | |||
| 144 | 148 | ||
| 145 | Common::PageTable page_table{page_bits}; | 149 | Common::PageTable page_table{page_bits}; |
| 146 | VMAMap vma_map; | 150 | VMAMap vma_map; |
| 151 | VideoCore::RasterizerInterface& rasterizer; | ||
| 147 | }; | 152 | }; |
| 148 | 153 | ||
| 149 | } // namespace Tegra | 154 | } // namespace Tegra |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a1a51f226..3ea08ef7b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | 21 | ||
| 22 | namespace OpenGL::GLShader { | 22 | namespace OpenGL::GLShader { |
| 23 | 23 | ||
| 24 | namespace { | ||
| 25 | |||
| 24 | using Tegra::Shader::Attribute; | 26 | using Tegra::Shader::Attribute; |
| 25 | using Tegra::Shader::AttributeUse; | 27 | using Tegra::Shader::AttributeUse; |
| 26 | using Tegra::Shader::Header; | 28 | using Tegra::Shader::Header; |
| @@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 34 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | 36 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; |
| 35 | using Operation = const OperationNode&; | 37 | using Operation = const OperationNode&; |
| 36 | 38 | ||
| 39 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 40 | |||
| 41 | struct TextureAoffi {}; | ||
| 42 | using TextureArgument = std::pair<Type, Node>; | ||
| 43 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | ||
| 44 | |||
| 37 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | 45 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; |
| 38 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 46 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 39 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 47 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); |
| 40 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | 48 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = |
| 41 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | 49 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); |
| 42 | 50 | ||
| 43 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 44 | |||
| 45 | class ShaderWriter { | 51 | class ShaderWriter { |
| 46 | public: | 52 | public: |
| 47 | void AddExpression(std::string_view text) { | 53 | void AddExpression(std::string_view text) { |
| @@ -91,7 +97,7 @@ private: | |||
| 91 | }; | 97 | }; |
| 92 | 98 | ||
| 93 | /// Generates code to use for a swizzle operation. | 99 | /// Generates code to use for a swizzle operation. |
| 94 | static std::string GetSwizzle(u32 elem) { | 100 | std::string GetSwizzle(u32 elem) { |
| 95 | ASSERT(elem <= 3); | 101 | ASSERT(elem <= 3); |
| 96 | std::string swizzle = "."; | 102 | std::string swizzle = "."; |
| 97 | swizzle += "xyzw"[elem]; | 103 | swizzle += "xyzw"[elem]; |
| @@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) { | |||
| 99 | } | 105 | } |
| 100 | 106 | ||
| 101 | /// Translate topology | 107 | /// Translate topology |
| 102 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | 108 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 103 | switch (topology) { | 109 | switch (topology) { |
| 104 | case Tegra::Shader::OutputTopology::PointList: | 110 | case Tegra::Shader::OutputTopology::PointList: |
| 105 | return "points"; | 111 | return "points"; |
| @@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | |||
| 114 | } | 120 | } |
| 115 | 121 | ||
| 116 | /// Returns true if an object has to be treated as precise | 122 | /// Returns true if an object has to be treated as precise |
| 117 | static bool IsPrecise(Operation operand) { | 123 | bool IsPrecise(Operation operand) { |
| 118 | const auto& meta = operand.GetMeta(); | 124 | const auto& meta = operand.GetMeta(); |
| 119 | 125 | ||
| 120 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | 126 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |
| @@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) { | |||
| 126 | return false; | 132 | return false; |
| 127 | } | 133 | } |
| 128 | 134 | ||
| 129 | static bool IsPrecise(Node node) { | 135 | bool IsPrecise(Node node) { |
| 130 | if (const auto operation = std::get_if<OperationNode>(node)) { | 136 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 131 | return IsPrecise(*operation); | 137 | return IsPrecise(*operation); |
| 132 | } | 138 | } |
| @@ -723,8 +729,8 @@ private: | |||
| 723 | result_type)); | 729 | result_type)); |
| 724 | } | 730 | } |
| 725 | 731 | ||
| 726 | std::string GenerateTexture(Operation operation, const std::string& func, | 732 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, |
| 727 | const std::vector<std::pair<Type, Node>>& extras) { | 733 | const std::vector<TextureIR>& extras) { |
| 728 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 734 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 729 | 735 | ||
| 730 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 736 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| @@ -734,11 +740,11 @@ private: | |||
| 734 | const bool has_array = meta->sampler.IsArray(); | 740 | const bool has_array = meta->sampler.IsArray(); |
| 735 | const bool has_shadow = meta->sampler.IsShadow(); | 741 | const bool has_shadow = meta->sampler.IsShadow(); |
| 736 | 742 | ||
| 737 | std::string expr = func; | 743 | std::string expr = "texture" + function_suffix; |
| 738 | expr += '('; | 744 | if (!meta->aoffi.empty()) { |
| 739 | expr += GetSampler(meta->sampler); | 745 | expr += "Offset"; |
| 740 | expr += ", "; | 746 | } |
| 741 | 747 | expr += '(' + GetSampler(meta->sampler) + ", "; | |
| 742 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); | 748 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); |
| 743 | expr += '('; | 749 | expr += '('; |
| 744 | for (std::size_t i = 0; i < count; ++i) { | 750 | for (std::size_t i = 0; i < count; ++i) { |
| @@ -756,36 +762,74 @@ private: | |||
| 756 | } | 762 | } |
| 757 | expr += ')'; | 763 | expr += ')'; |
| 758 | 764 | ||
| 759 | for (const auto& extra_pair : extras) { | 765 | for (const auto& variant : extras) { |
| 760 | const auto [type, operand] = extra_pair; | 766 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { |
| 761 | if (operand == nullptr) { | 767 | expr += GenerateTextureArgument(*argument); |
| 762 | continue; | 768 | } else if (std::get_if<TextureAoffi>(&variant)) { |
| 769 | expr += GenerateTextureAoffi(meta->aoffi); | ||
| 770 | } else { | ||
| 771 | UNREACHABLE(); | ||
| 763 | } | 772 | } |
| 764 | expr += ", "; | 773 | } |
| 765 | 774 | ||
| 766 | switch (type) { | 775 | return expr + ')'; |
| 767 | case Type::Int: | 776 | } |
| 768 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | 777 | |
| 769 | // Inline the string as an immediate integer in GLSL (some extra arguments are | 778 | std::string GenerateTextureArgument(TextureArgument argument) { |
| 770 | // required to be constant) | 779 | const auto [type, operand] = argument; |
| 771 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 780 | if (operand == nullptr) { |
| 772 | } else { | 781 | return {}; |
| 773 | expr += "ftoi(" + Visit(operand) + ')'; | 782 | } |
| 774 | } | 783 | |
| 775 | break; | 784 | std::string expr = ", "; |
| 776 | case Type::Float: | 785 | switch (type) { |
| 777 | expr += Visit(operand); | 786 | case Type::Int: |
| 778 | break; | 787 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { |
| 779 | default: { | 788 | // Inline the string as an immediate integer in GLSL (some extra arguments are |
| 780 | const auto type_int = static_cast<u32>(type); | 789 | // required to be constant) |
| 781 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | 790 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 782 | expr += '0'; | 791 | } else { |
| 783 | break; | 792 | expr += "ftoi(" + Visit(operand) + ')'; |
| 793 | } | ||
| 794 | break; | ||
| 795 | case Type::Float: | ||
| 796 | expr += Visit(operand); | ||
| 797 | break; | ||
| 798 | default: { | ||
| 799 | const auto type_int = static_cast<u32>(type); | ||
| 800 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 801 | expr += '0'; | ||
| 802 | break; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | return expr; | ||
| 806 | } | ||
| 807 | |||
| 808 | std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||
| 809 | if (aoffi.empty()) { | ||
| 810 | return {}; | ||
| 811 | } | ||
| 812 | constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; | ||
| 813 | std::string expr = ", "; | ||
| 814 | expr += coord_constructors.at(aoffi.size() - 1); | ||
| 815 | expr += '('; | ||
| 816 | |||
| 817 | for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||
| 818 | const auto operand{aoffi.at(index)}; | ||
| 819 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||
| 820 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||
| 821 | // to be constant by the standard). | ||
| 822 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 823 | } else { | ||
| 824 | expr += "ftoi(" + Visit(operand) + ')'; | ||
| 784 | } | 825 | } |
| 826 | if (index + 1 < aoffi.size()) { | ||
| 827 | expr += ", "; | ||
| 785 | } | 828 | } |
| 786 | } | 829 | } |
| 830 | expr += ')'; | ||
| 787 | 831 | ||
| 788 | return expr + ')'; | 832 | return expr; |
| 789 | } | 833 | } |
| 790 | 834 | ||
| 791 | std::string Assign(Operation operation) { | 835 | std::string Assign(Operation operation) { |
| @@ -1164,7 +1208,8 @@ private: | |||
| 1164 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1208 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1165 | ASSERT(meta); | 1209 | ASSERT(meta); |
| 1166 | 1210 | ||
| 1167 | std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); | 1211 | std::string expr = GenerateTexture( |
| 1212 | operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); | ||
| 1168 | if (meta->sampler.IsShadow()) { | 1213 | if (meta->sampler.IsShadow()) { |
| 1169 | expr = "vec4(" + expr + ')'; | 1214 | expr = "vec4(" + expr + ')'; |
| 1170 | } | 1215 | } |
| @@ -1175,7 +1220,8 @@ private: | |||
| 1175 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1220 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1176 | ASSERT(meta); | 1221 | ASSERT(meta); |
| 1177 | 1222 | ||
| 1178 | std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); | 1223 | std::string expr = GenerateTexture( |
| 1224 | operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); | ||
| 1179 | if (meta->sampler.IsShadow()) { | 1225 | if (meta->sampler.IsShadow()) { |
| 1180 | expr = "vec4(" + expr + ')'; | 1226 | expr = "vec4(" + expr + ')'; |
| 1181 | } | 1227 | } |
| @@ -1187,7 +1233,8 @@ private: | |||
| 1187 | ASSERT(meta); | 1233 | ASSERT(meta); |
| 1188 | 1234 | ||
| 1189 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; | 1235 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; |
| 1190 | return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + | 1236 | return GenerateTexture(operation, "Gather", |
| 1237 | {TextureArgument{type, meta->component}, TextureAoffi{}}) + | ||
| 1191 | GetSwizzle(meta->element); | 1238 | GetSwizzle(meta->element); |
| 1192 | } | 1239 | } |
| 1193 | 1240 | ||
| @@ -1217,8 +1264,8 @@ private: | |||
| 1217 | ASSERT(meta); | 1264 | ASSERT(meta); |
| 1218 | 1265 | ||
| 1219 | if (meta->element < 2) { | 1266 | if (meta->element < 2) { |
| 1220 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + | 1267 | return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + |
| 1221 | " * vec2(256))" + GetSwizzle(meta->element) + "))"; | 1268 | GetSwizzle(meta->element) + "))"; |
| 1222 | } | 1269 | } |
| 1223 | return "0"; | 1270 | return "0"; |
| 1224 | } | 1271 | } |
| @@ -1571,6 +1618,8 @@ private: | |||
| 1571 | ShaderWriter code; | 1618 | ShaderWriter code; |
| 1572 | }; | 1619 | }; |
| 1573 | 1620 | ||
| 1621 | } // Anonymous namespace | ||
| 1622 | |||
| 1574 | std::string GetCommonDeclarations() { | 1623 | std::string GetCommonDeclarations() { |
| 1575 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); | 1624 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); |
| 1576 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); | 1625 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d2d979997..8a43eb157 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -10,8 +10,8 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/file_util.h" | 11 | #include "common/file_util.h" |
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "common/lz4_compression.h" | ||
| 14 | #include "common/scm_rev.h" | 13 | #include "common/scm_rev.h" |
| 14 | #include "common/zstd_compression.h" | ||
| 15 | 15 | ||
| 16 | #include "core/core.h" | 16 | #include "core/core.h" |
| 17 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| @@ -259,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 259 | return {}; | 259 | return {}; |
| 260 | } | 260 | } |
| 261 | 261 | ||
| 262 | dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length); | 262 | dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary); |
| 263 | if (dump.binary.empty()) { | 263 | if (dump.binary.empty()) { |
| 264 | return {}; | 264 | return {}; |
| 265 | } | 265 | } |
| @@ -288,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 288 | return {}; | 288 | return {}; |
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size); | 291 | const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code); |
| 292 | if (code.empty()) { | 292 | if (code.empty()) { |
| 293 | return {}; | 293 | return {}; |
| 294 | } | 294 | } |
| @@ -474,8 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str | |||
| 474 | if (!IsUsable()) | 474 | if (!IsUsable()) |
| 475 | return; | 475 | return; |
| 476 | 476 | ||
| 477 | const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC( | 477 | const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault( |
| 478 | reinterpret_cast<const u8*>(code.data()), code.size(), 9)}; | 478 | reinterpret_cast<const u8*>(code.data()), code.size())}; |
| 479 | if (compressed_code.empty()) { | 479 | if (compressed_code.empty()) { |
| 480 | LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", | 480 | LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", |
| 481 | unique_identifier); | 481 | unique_identifier); |
| @@ -506,7 +506,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 506 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 506 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 507 | 507 | ||
| 508 | const std::vector<u8> compressed_binary = | 508 | const std::vector<u8> compressed_binary = |
| 509 | Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9); | 509 | Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size()); |
| 510 | 510 | ||
| 511 | if (compressed_binary.empty()) { | 511 | if (compressed_binary.empty()) { |
| 512 | LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", | 512 | LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 8eef2a920..37dcfefdb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -62,7 +62,6 @@ public: | |||
| 62 | UpdatePipeline(); | 62 | UpdatePipeline(); |
| 63 | state.draw.shader_program = 0; | 63 | state.draw.shader_program = 0; |
| 64 | state.draw.program_pipeline = pipeline.handle; | 64 | state.draw.program_pipeline = pipeline.handle; |
| 65 | state.geometry_shaders.enabled = (gs != 0); | ||
| 66 | } | 65 | } |
| 67 | 66 | ||
| 68 | private: | 67 | private: |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9419326a3..52d569a1b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -10,16 +10,62 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL { | 11 | namespace OpenGL { |
| 12 | 12 | ||
| 13 | OpenGLState OpenGLState::cur_state; | 13 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 14 | 14 | ||
| 15 | OpenGLState OpenGLState::cur_state; | ||
| 15 | bool OpenGLState::s_rgb_used; | 16 | bool OpenGLState::s_rgb_used; |
| 16 | 17 | ||
| 18 | namespace { | ||
| 19 | |||
| 20 | template <typename T> | ||
| 21 | bool UpdateValue(T& current_value, const T new_value) { | ||
| 22 | const bool changed = current_value != new_value; | ||
| 23 | current_value = new_value; | ||
| 24 | return changed; | ||
| 25 | } | ||
| 26 | |||
| 27 | template <typename T1, typename T2> | ||
| 28 | bool UpdateTie(T1 current_value, const T2 new_value) { | ||
| 29 | const bool changed = current_value != new_value; | ||
| 30 | current_value = new_value; | ||
| 31 | return changed; | ||
| 32 | } | ||
| 33 | |||
| 34 | void Enable(GLenum cap, bool enable) { | ||
| 35 | if (enable) { | ||
| 36 | glEnable(cap); | ||
| 37 | } else { | ||
| 38 | glDisable(cap); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | void Enable(GLenum cap, GLuint index, bool enable) { | ||
| 43 | if (enable) { | ||
| 44 | glEnablei(cap, index); | ||
| 45 | } else { | ||
| 46 | glDisablei(cap, index); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void Enable(GLenum cap, bool& current_value, bool new_value) { | ||
| 51 | if (UpdateValue(current_value, new_value)) | ||
| 52 | Enable(cap, new_value); | ||
| 53 | } | ||
| 54 | |||
| 55 | void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { | ||
| 56 | if (UpdateValue(current_value, new_value)) | ||
| 57 | Enable(cap, index, new_value); | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace | ||
| 61 | |||
| 17 | OpenGLState::OpenGLState() { | 62 | OpenGLState::OpenGLState() { |
| 18 | // These all match default OpenGL values | 63 | // These all match default OpenGL values |
| 19 | geometry_shaders.enabled = false; | ||
| 20 | framebuffer_srgb.enabled = false; | 64 | framebuffer_srgb.enabled = false; |
| 65 | |||
| 21 | multisample_control.alpha_to_coverage = false; | 66 | multisample_control.alpha_to_coverage = false; |
| 22 | multisample_control.alpha_to_one = false; | 67 | multisample_control.alpha_to_one = false; |
| 68 | |||
| 23 | cull.enabled = false; | 69 | cull.enabled = false; |
| 24 | cull.mode = GL_BACK; | 70 | cull.mode = GL_BACK; |
| 25 | cull.front_face = GL_CCW; | 71 | cull.front_face = GL_CCW; |
| @@ -30,14 +76,15 @@ OpenGLState::OpenGLState() { | |||
| 30 | 76 | ||
| 31 | primitive_restart.enabled = false; | 77 | primitive_restart.enabled = false; |
| 32 | primitive_restart.index = 0; | 78 | primitive_restart.index = 0; |
| 79 | |||
| 33 | for (auto& item : color_mask) { | 80 | for (auto& item : color_mask) { |
| 34 | item.red_enabled = GL_TRUE; | 81 | item.red_enabled = GL_TRUE; |
| 35 | item.green_enabled = GL_TRUE; | 82 | item.green_enabled = GL_TRUE; |
| 36 | item.blue_enabled = GL_TRUE; | 83 | item.blue_enabled = GL_TRUE; |
| 37 | item.alpha_enabled = GL_TRUE; | 84 | item.alpha_enabled = GL_TRUE; |
| 38 | } | 85 | } |
| 39 | stencil.test_enabled = false; | 86 | |
| 40 | auto reset_stencil = [](auto& config) { | 87 | const auto ResetStencil = [](auto& config) { |
| 41 | config.test_func = GL_ALWAYS; | 88 | config.test_func = GL_ALWAYS; |
| 42 | config.test_ref = 0; | 89 | config.test_ref = 0; |
| 43 | config.test_mask = 0xFFFFFFFF; | 90 | config.test_mask = 0xFFFFFFFF; |
| @@ -46,8 +93,10 @@ OpenGLState::OpenGLState() { | |||
| 46 | config.action_depth_pass = GL_KEEP; | 93 | config.action_depth_pass = GL_KEEP; |
| 47 | config.action_stencil_fail = GL_KEEP; | 94 | config.action_stencil_fail = GL_KEEP; |
| 48 | }; | 95 | }; |
| 49 | reset_stencil(stencil.front); | 96 | stencil.test_enabled = false; |
| 50 | reset_stencil(stencil.back); | 97 | ResetStencil(stencil.front); |
| 98 | ResetStencil(stencil.back); | ||
| 99 | |||
| 51 | for (auto& item : viewports) { | 100 | for (auto& item : viewports) { |
| 52 | item.x = 0; | 101 | item.x = 0; |
| 53 | item.y = 0; | 102 | item.y = 0; |
| @@ -61,6 +110,7 @@ OpenGLState::OpenGLState() { | |||
| 61 | item.scissor.width = 0; | 110 | item.scissor.width = 0; |
| 62 | item.scissor.height = 0; | 111 | item.scissor.height = 0; |
| 63 | } | 112 | } |
| 113 | |||
| 64 | for (auto& item : blend) { | 114 | for (auto& item : blend) { |
| 65 | item.enabled = true; | 115 | item.enabled = true; |
| 66 | item.rgb_equation = GL_FUNC_ADD; | 116 | item.rgb_equation = GL_FUNC_ADD; |
| @@ -70,11 +120,14 @@ OpenGLState::OpenGLState() { | |||
| 70 | item.src_a_func = GL_ONE; | 120 | item.src_a_func = GL_ONE; |
| 71 | item.dst_a_func = GL_ZERO; | 121 | item.dst_a_func = GL_ZERO; |
| 72 | } | 122 | } |
| 123 | |||
| 73 | independant_blend.enabled = false; | 124 | independant_blend.enabled = false; |
| 125 | |||
| 74 | blend_color.red = 0.0f; | 126 | blend_color.red = 0.0f; |
| 75 | blend_color.green = 0.0f; | 127 | blend_color.green = 0.0f; |
| 76 | blend_color.blue = 0.0f; | 128 | blend_color.blue = 0.0f; |
| 77 | blend_color.alpha = 0.0f; | 129 | blend_color.alpha = 0.0f; |
| 130 | |||
| 78 | logic_op.enabled = false; | 131 | logic_op.enabled = false; |
| 79 | logic_op.operation = GL_COPY; | 132 | logic_op.operation = GL_COPY; |
| 80 | 133 | ||
| @@ -91,9 +144,12 @@ OpenGLState::OpenGLState() { | |||
| 91 | clip_distance = {}; | 144 | clip_distance = {}; |
| 92 | 145 | ||
| 93 | point.size = 1; | 146 | point.size = 1; |
| 147 | |||
| 94 | fragment_color_clamp.enabled = false; | 148 | fragment_color_clamp.enabled = false; |
| 149 | |||
| 95 | depth_clamp.far_plane = false; | 150 | depth_clamp.far_plane = false; |
| 96 | depth_clamp.near_plane = false; | 151 | depth_clamp.near_plane = false; |
| 152 | |||
| 97 | polygon_offset.fill_enable = false; | 153 | polygon_offset.fill_enable = false; |
| 98 | polygon_offset.line_enable = false; | 154 | polygon_offset.line_enable = false; |
| 99 | polygon_offset.point_enable = false; | 155 | polygon_offset.point_enable = false; |
| @@ -103,260 +159,255 @@ OpenGLState::OpenGLState() { | |||
| 103 | } | 159 | } |
| 104 | 160 | ||
| 105 | void OpenGLState::ApplyDefaultState() { | 161 | void OpenGLState::ApplyDefaultState() { |
| 162 | glEnable(GL_BLEND); | ||
| 106 | glDisable(GL_FRAMEBUFFER_SRGB); | 163 | glDisable(GL_FRAMEBUFFER_SRGB); |
| 107 | glDisable(GL_CULL_FACE); | 164 | glDisable(GL_CULL_FACE); |
| 108 | glDisable(GL_DEPTH_TEST); | 165 | glDisable(GL_DEPTH_TEST); |
| 109 | glDisable(GL_PRIMITIVE_RESTART); | 166 | glDisable(GL_PRIMITIVE_RESTART); |
| 110 | glDisable(GL_STENCIL_TEST); | 167 | glDisable(GL_STENCIL_TEST); |
| 111 | glEnable(GL_BLEND); | ||
| 112 | glDisable(GL_COLOR_LOGIC_OP); | 168 | glDisable(GL_COLOR_LOGIC_OP); |
| 113 | glDisable(GL_SCISSOR_TEST); | 169 | glDisable(GL_SCISSOR_TEST); |
| 114 | } | 170 | } |
| 115 | 171 | ||
| 172 | void OpenGLState::ApplyFramebufferState() const { | ||
| 173 | if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { | ||
| 174 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | ||
| 175 | } | ||
| 176 | if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { | ||
| 177 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | void OpenGLState::ApplyVertexArrayState() const { | ||
| 182 | if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { | ||
| 183 | glBindVertexArray(draw.vertex_array); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | void OpenGLState::ApplyShaderProgram() const { | ||
| 188 | if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { | ||
| 189 | glUseProgram(draw.shader_program); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | void OpenGLState::ApplyProgramPipeline() const { | ||
| 194 | if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { | ||
| 195 | glBindProgramPipeline(draw.program_pipeline); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | void OpenGLState::ApplyClipDistances() const { | ||
| 200 | for (std::size_t i = 0; i < clip_distance.size(); ++i) { | ||
| 201 | Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], | ||
| 202 | clip_distance[i]); | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | void OpenGLState::ApplyPointSize() const { | ||
| 207 | if (UpdateValue(cur_state.point.size, point.size)) { | ||
| 208 | glPointSize(point.size); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | void OpenGLState::ApplyFragmentColorClamp() const { | ||
| 213 | if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { | ||
| 214 | glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, | ||
| 215 | fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | void OpenGLState::ApplyMultisample() const { | ||
| 220 | Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, | ||
| 221 | multisample_control.alpha_to_coverage); | ||
| 222 | Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, | ||
| 223 | multisample_control.alpha_to_one); | ||
| 224 | } | ||
| 225 | |||
| 226 | void OpenGLState::ApplyDepthClamp() const { | ||
| 227 | if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && | ||
| 228 | depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | cur_state.depth_clamp = depth_clamp; | ||
| 232 | |||
| 233 | UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, | ||
| 234 | "Unimplemented Depth Clamp Separation!"); | ||
| 235 | |||
| 236 | Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); | ||
| 237 | } | ||
| 238 | |||
| 116 | void OpenGLState::ApplySRgb() const { | 239 | void OpenGLState::ApplySRgb() const { |
| 117 | if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { | 240 | if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) |
| 118 | if (framebuffer_srgb.enabled) { | 241 | return; |
| 119 | // Track if sRGB is used | 242 | cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; |
| 120 | s_rgb_used = true; | 243 | if (framebuffer_srgb.enabled) { |
| 121 | glEnable(GL_FRAMEBUFFER_SRGB); | 244 | // Track if sRGB is used |
| 122 | } else { | 245 | s_rgb_used = true; |
| 123 | glDisable(GL_FRAMEBUFFER_SRGB); | 246 | glEnable(GL_FRAMEBUFFER_SRGB); |
| 124 | } | 247 | } else { |
| 248 | glDisable(GL_FRAMEBUFFER_SRGB); | ||
| 125 | } | 249 | } |
| 126 | } | 250 | } |
| 127 | 251 | ||
| 128 | void OpenGLState::ApplyCulling() const { | 252 | void OpenGLState::ApplyCulling() const { |
| 129 | if (cull.enabled != cur_state.cull.enabled) { | 253 | Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); |
| 130 | if (cull.enabled) { | ||
| 131 | glEnable(GL_CULL_FACE); | ||
| 132 | } else { | ||
| 133 | glDisable(GL_CULL_FACE); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | 254 | ||
| 137 | if (cull.mode != cur_state.cull.mode) { | 255 | if (UpdateValue(cur_state.cull.mode, cull.mode)) { |
| 138 | glCullFace(cull.mode); | 256 | glCullFace(cull.mode); |
| 139 | } | 257 | } |
| 140 | 258 | ||
| 141 | if (cull.front_face != cur_state.cull.front_face) { | 259 | if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { |
| 142 | glFrontFace(cull.front_face); | 260 | glFrontFace(cull.front_face); |
| 143 | } | 261 | } |
| 144 | } | 262 | } |
| 145 | 263 | ||
| 146 | void OpenGLState::ApplyColorMask() const { | 264 | void OpenGLState::ApplyColorMask() const { |
| 147 | if (independant_blend.enabled) { | 265 | for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { |
| 148 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 266 | const auto& updated = color_mask[i]; |
| 149 | const auto& updated = color_mask[i]; | 267 | auto& current = cur_state.color_mask[i]; |
| 150 | const auto& current = cur_state.color_mask[i]; | ||
| 151 | if (updated.red_enabled != current.red_enabled || | ||
| 152 | updated.green_enabled != current.green_enabled || | ||
| 153 | updated.blue_enabled != current.blue_enabled || | ||
| 154 | updated.alpha_enabled != current.alpha_enabled) { | ||
| 155 | glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, | ||
| 156 | updated.blue_enabled, updated.alpha_enabled); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | } else { | ||
| 160 | const auto& updated = color_mask[0]; | ||
| 161 | const auto& current = cur_state.color_mask[0]; | ||
| 162 | if (updated.red_enabled != current.red_enabled || | 268 | if (updated.red_enabled != current.red_enabled || |
| 163 | updated.green_enabled != current.green_enabled || | 269 | updated.green_enabled != current.green_enabled || |
| 164 | updated.blue_enabled != current.blue_enabled || | 270 | updated.blue_enabled != current.blue_enabled || |
| 165 | updated.alpha_enabled != current.alpha_enabled) { | 271 | updated.alpha_enabled != current.alpha_enabled) { |
| 166 | glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, | 272 | current = updated; |
| 167 | updated.alpha_enabled); | 273 | glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, |
| 274 | updated.blue_enabled, updated.alpha_enabled); | ||
| 168 | } | 275 | } |
| 169 | } | 276 | } |
| 170 | } | 277 | } |
| 171 | 278 | ||
| 172 | void OpenGLState::ApplyDepth() const { | 279 | void OpenGLState::ApplyDepth() const { |
| 173 | if (depth.test_enabled != cur_state.depth.test_enabled) { | 280 | Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); |
| 174 | if (depth.test_enabled) { | ||
| 175 | glEnable(GL_DEPTH_TEST); | ||
| 176 | } else { | ||
| 177 | glDisable(GL_DEPTH_TEST); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | 281 | ||
| 181 | if (depth.test_func != cur_state.depth.test_func) { | 282 | if (cur_state.depth.test_func != depth.test_func) { |
| 283 | cur_state.depth.test_func = depth.test_func; | ||
| 182 | glDepthFunc(depth.test_func); | 284 | glDepthFunc(depth.test_func); |
| 183 | } | 285 | } |
| 184 | 286 | ||
| 185 | if (depth.write_mask != cur_state.depth.write_mask) { | 287 | if (cur_state.depth.write_mask != depth.write_mask) { |
| 288 | cur_state.depth.write_mask = depth.write_mask; | ||
| 186 | glDepthMask(depth.write_mask); | 289 | glDepthMask(depth.write_mask); |
| 187 | } | 290 | } |
| 188 | } | 291 | } |
| 189 | 292 | ||
| 190 | void OpenGLState::ApplyPrimitiveRestart() const { | 293 | void OpenGLState::ApplyPrimitiveRestart() const { |
| 191 | if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { | 294 | Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); |
| 192 | if (primitive_restart.enabled) { | ||
| 193 | glEnable(GL_PRIMITIVE_RESTART); | ||
| 194 | } else { | ||
| 195 | glDisable(GL_PRIMITIVE_RESTART); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | 295 | ||
| 199 | if (primitive_restart.index != cur_state.primitive_restart.index) { | 296 | if (cur_state.primitive_restart.index != primitive_restart.index) { |
| 297 | cur_state.primitive_restart.index = primitive_restart.index; | ||
| 200 | glPrimitiveRestartIndex(primitive_restart.index); | 298 | glPrimitiveRestartIndex(primitive_restart.index); |
| 201 | } | 299 | } |
| 202 | } | 300 | } |
| 203 | 301 | ||
| 204 | void OpenGLState::ApplyStencilTest() const { | 302 | void OpenGLState::ApplyStencilTest() const { |
| 205 | if (stencil.test_enabled != cur_state.stencil.test_enabled) { | 303 | Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); |
| 206 | if (stencil.test_enabled) { | 304 | |
| 207 | glEnable(GL_STENCIL_TEST); | 305 | const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { |
| 208 | } else { | 306 | if (current.test_func != config.test_func || current.test_ref != config.test_ref || |
| 209 | glDisable(GL_STENCIL_TEST); | 307 | current.test_mask != config.test_mask) { |
| 210 | } | 308 | current.test_func = config.test_func; |
| 211 | } | 309 | current.test_ref = config.test_ref; |
| 212 | 310 | current.test_mask = config.test_mask; | |
| 213 | const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) { | ||
| 214 | if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || | ||
| 215 | config.test_mask != prev_config.test_mask) { | ||
| 216 | glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); | 311 | glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); |
| 217 | } | 312 | } |
| 218 | if (config.action_depth_fail != prev_config.action_depth_fail || | 313 | if (current.action_depth_fail != config.action_depth_fail || |
| 219 | config.action_depth_pass != prev_config.action_depth_pass || | 314 | current.action_depth_pass != config.action_depth_pass || |
| 220 | config.action_stencil_fail != prev_config.action_stencil_fail) { | 315 | current.action_stencil_fail != config.action_stencil_fail) { |
| 316 | current.action_depth_fail = config.action_depth_fail; | ||
| 317 | current.action_depth_pass = config.action_depth_pass; | ||
| 318 | current.action_stencil_fail = config.action_stencil_fail; | ||
| 221 | glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, | 319 | glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, |
| 222 | config.action_depth_pass); | 320 | config.action_depth_pass); |
| 223 | } | 321 | } |
| 224 | if (config.write_mask != prev_config.write_mask) { | 322 | if (current.write_mask != config.write_mask) { |
| 323 | current.write_mask = config.write_mask; | ||
| 225 | glStencilMaskSeparate(face, config.write_mask); | 324 | glStencilMaskSeparate(face, config.write_mask); |
| 226 | } | 325 | } |
| 227 | }; | 326 | }; |
| 228 | ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); | 327 | ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); |
| 229 | ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); | 328 | ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); |
| 230 | } | 329 | } |
| 231 | // Viewport does not affects glClearBuffer so emulate viewport using scissor test | ||
| 232 | void OpenGLState::EmulateViewportWithScissor() { | ||
| 233 | auto& current = viewports[0]; | ||
| 234 | if (current.scissor.enabled) { | ||
| 235 | const GLint left = std::max(current.x, current.scissor.x); | ||
| 236 | const GLint right = | ||
| 237 | std::max(current.x + current.width, current.scissor.x + current.scissor.width); | ||
| 238 | const GLint bottom = std::max(current.y, current.scissor.y); | ||
| 239 | const GLint top = | ||
| 240 | std::max(current.y + current.height, current.scissor.y + current.scissor.height); | ||
| 241 | current.scissor.x = std::max(left, 0); | ||
| 242 | current.scissor.y = std::max(bottom, 0); | ||
| 243 | current.scissor.width = std::max(right - left, 0); | ||
| 244 | current.scissor.height = std::max(top - bottom, 0); | ||
| 245 | } else { | ||
| 246 | current.scissor.enabled = true; | ||
| 247 | current.scissor.x = current.x; | ||
| 248 | current.scissor.y = current.y; | ||
| 249 | current.scissor.width = current.width; | ||
| 250 | current.scissor.height = current.height; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | 330 | ||
| 254 | void OpenGLState::ApplyViewport() const { | 331 | void OpenGLState::ApplyViewport() const { |
| 255 | if (geometry_shaders.enabled) { | 332 | for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { |
| 256 | for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); | 333 | const auto& updated = viewports[i]; |
| 257 | i++) { | 334 | auto& current = cur_state.viewports[i]; |
| 258 | const auto& current = cur_state.viewports[i]; | 335 | |
| 259 | const auto& updated = viewports[i]; | 336 | if (current.x != updated.x || current.y != updated.y || current.width != updated.width || |
| 260 | if (updated.x != current.x || updated.y != current.y || | 337 | current.height != updated.height) { |
| 261 | updated.width != current.width || updated.height != current.height) { | 338 | current.x = updated.x; |
| 262 | glViewportIndexedf( | 339 | current.y = updated.y; |
| 263 | i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), | 340 | current.width = updated.width; |
| 264 | static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height)); | 341 | current.height = updated.height; |
| 265 | } | 342 | glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), |
| 266 | if (updated.depth_range_near != current.depth_range_near || | 343 | static_cast<GLfloat>(updated.width), |
| 267 | updated.depth_range_far != current.depth_range_far) { | 344 | static_cast<GLfloat>(updated.height)); |
| 268 | glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); | ||
| 269 | } | ||
| 270 | |||
| 271 | if (updated.scissor.enabled != current.scissor.enabled) { | ||
| 272 | if (updated.scissor.enabled) { | ||
| 273 | glEnablei(GL_SCISSOR_TEST, i); | ||
| 274 | } else { | ||
| 275 | glDisablei(GL_SCISSOR_TEST, i); | ||
| 276 | } | ||
| 277 | } | ||
| 278 | |||
| 279 | if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || | ||
| 280 | updated.scissor.width != current.scissor.width || | ||
| 281 | updated.scissor.height != current.scissor.height) { | ||
| 282 | glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, | ||
| 283 | updated.scissor.height); | ||
| 284 | } | ||
| 285 | } | ||
| 286 | } else { | ||
| 287 | const auto& current = cur_state.viewports[0]; | ||
| 288 | const auto& updated = viewports[0]; | ||
| 289 | if (updated.x != current.x || updated.y != current.y || updated.width != current.width || | ||
| 290 | updated.height != current.height) { | ||
| 291 | glViewport(updated.x, updated.y, updated.width, updated.height); | ||
| 292 | } | ||
| 293 | |||
| 294 | if (updated.depth_range_near != current.depth_range_near || | ||
| 295 | updated.depth_range_far != current.depth_range_far) { | ||
| 296 | glDepthRange(updated.depth_range_near, updated.depth_range_far); | ||
| 297 | } | 345 | } |
| 298 | 346 | if (current.depth_range_near != updated.depth_range_near || | |
| 299 | if (updated.scissor.enabled != current.scissor.enabled) { | 347 | current.depth_range_far != updated.depth_range_far) { |
| 300 | if (updated.scissor.enabled) { | 348 | current.depth_range_near = updated.depth_range_near; |
| 301 | glEnable(GL_SCISSOR_TEST); | 349 | current.depth_range_far = updated.depth_range_far; |
| 302 | } else { | 350 | glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); |
| 303 | glDisable(GL_SCISSOR_TEST); | ||
| 304 | } | ||
| 305 | } | 351 | } |
| 306 | 352 | ||
| 307 | if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || | 353 | Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); |
| 308 | updated.scissor.width != current.scissor.width || | 354 | |
| 309 | updated.scissor.height != current.scissor.height) { | 355 | if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || |
| 310 | glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, | 356 | current.scissor.width != updated.scissor.width || |
| 311 | updated.scissor.height); | 357 | current.scissor.height != updated.scissor.height) { |
| 358 | current.scissor.x = updated.scissor.x; | ||
| 359 | current.scissor.y = updated.scissor.y; | ||
| 360 | current.scissor.width = updated.scissor.width; | ||
| 361 | current.scissor.height = updated.scissor.height; | ||
| 362 | glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, | ||
| 363 | updated.scissor.height); | ||
| 312 | } | 364 | } |
| 313 | } | 365 | } |
| 314 | } | 366 | } |
| 315 | 367 | ||
| 316 | void OpenGLState::ApplyGlobalBlending() const { | 368 | void OpenGLState::ApplyGlobalBlending() const { |
| 317 | const Blend& current = cur_state.blend[0]; | ||
| 318 | const Blend& updated = blend[0]; | 369 | const Blend& updated = blend[0]; |
| 319 | if (updated.enabled != current.enabled) { | 370 | Blend& current = cur_state.blend[0]; |
| 320 | if (updated.enabled) { | 371 | |
| 321 | glEnable(GL_BLEND); | 372 | Enable(GL_BLEND, current.enabled, updated.enabled); |
| 322 | } else { | 373 | |
| 323 | glDisable(GL_BLEND); | 374 | if (current.src_rgb_func != updated.src_rgb_func || |
| 324 | } | 375 | current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || |
| 325 | } | 376 | current.dst_a_func != updated.dst_a_func) { |
| 326 | if (!updated.enabled) { | 377 | current.src_rgb_func = updated.src_rgb_func; |
| 327 | return; | 378 | current.dst_rgb_func = updated.dst_rgb_func; |
| 328 | } | 379 | current.src_a_func = updated.src_a_func; |
| 329 | if (updated.src_rgb_func != current.src_rgb_func || | 380 | current.dst_a_func = updated.dst_a_func; |
| 330 | updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || | ||
| 331 | updated.dst_a_func != current.dst_a_func) { | ||
| 332 | glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, | 381 | glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, |
| 333 | updated.dst_a_func); | 382 | updated.dst_a_func); |
| 334 | } | 383 | } |
| 335 | 384 | ||
| 336 | if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { | 385 | if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { |
| 386 | current.rgb_equation = updated.rgb_equation; | ||
| 387 | current.a_equation = updated.a_equation; | ||
| 337 | glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); | 388 | glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); |
| 338 | } | 389 | } |
| 339 | } | 390 | } |
| 340 | 391 | ||
| 341 | void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { | 392 | void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { |
| 342 | const Blend& updated = blend[target]; | 393 | const Blend& updated = blend[target]; |
| 343 | const Blend& current = cur_state.blend[target]; | 394 | Blend& current = cur_state.blend[target]; |
| 344 | if (updated.enabled != current.enabled || force) { | 395 | |
| 345 | if (updated.enabled) { | 396 | if (current.enabled != updated.enabled || force) { |
| 346 | glEnablei(GL_BLEND, static_cast<GLuint>(target)); | 397 | current.enabled = updated.enabled; |
| 347 | } else { | 398 | Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); |
| 348 | glDisablei(GL_BLEND, static_cast<GLuint>(target)); | ||
| 349 | } | ||
| 350 | } | 399 | } |
| 351 | 400 | ||
| 352 | if (updated.src_rgb_func != current.src_rgb_func || | 401 | if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, |
| 353 | updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || | 402 | current.dst_a_func), |
| 354 | updated.dst_a_func != current.dst_a_func) { | 403 | std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, |
| 404 | updated.dst_a_func))) { | ||
| 355 | glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, | 405 | glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, |
| 356 | updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); | 406 | updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); |
| 357 | } | 407 | } |
| 358 | 408 | ||
| 359 | if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { | 409 | if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), |
| 410 | std::tie(updated.rgb_equation, updated.a_equation))) { | ||
| 360 | glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, | 411 | glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, |
| 361 | updated.a_equation); | 412 | updated.a_equation); |
| 362 | } | 413 | } |
| @@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { | |||
| 364 | 415 | ||
| 365 | void OpenGLState::ApplyBlending() const { | 416 | void OpenGLState::ApplyBlending() const { |
| 366 | if (independant_blend.enabled) { | 417 | if (independant_blend.enabled) { |
| 367 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 418 | const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; |
| 368 | ApplyTargetBlending(i, | 419 | for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { |
| 369 | independant_blend.enabled != cur_state.independant_blend.enabled); | 420 | ApplyTargetBlending(target, force); |
| 370 | } | 421 | } |
| 371 | } else { | 422 | } else { |
| 372 | ApplyGlobalBlending(); | 423 | ApplyGlobalBlending(); |
| 373 | } | 424 | } |
| 374 | if (blend_color.red != cur_state.blend_color.red || | 425 | cur_state.independant_blend.enabled = independant_blend.enabled; |
| 375 | blend_color.green != cur_state.blend_color.green || | 426 | |
| 376 | blend_color.blue != cur_state.blend_color.blue || | 427 | if (UpdateTie( |
| 377 | blend_color.alpha != cur_state.blend_color.alpha) { | 428 | std::tie(cur_state.blend_color.red, cur_state.blend_color.green, |
| 429 | cur_state.blend_color.blue, cur_state.blend_color.alpha), | ||
| 430 | std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { | ||
| 378 | glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); | 431 | glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); |
| 379 | } | 432 | } |
| 380 | } | 433 | } |
| 381 | 434 | ||
| 382 | void OpenGLState::ApplyLogicOp() const { | 435 | void OpenGLState::ApplyLogicOp() const { |
| 383 | if (logic_op.enabled != cur_state.logic_op.enabled) { | 436 | Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); |
| 384 | if (logic_op.enabled) { | ||
| 385 | glEnable(GL_COLOR_LOGIC_OP); | ||
| 386 | } else { | ||
| 387 | glDisable(GL_COLOR_LOGIC_OP); | ||
| 388 | } | ||
| 389 | } | ||
| 390 | 437 | ||
| 391 | if (logic_op.operation != cur_state.logic_op.operation) { | 438 | if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { |
| 392 | glLogicOp(logic_op.operation); | 439 | glLogicOp(logic_op.operation); |
| 393 | } | 440 | } |
| 394 | } | 441 | } |
| 395 | 442 | ||
| 396 | void OpenGLState::ApplyPolygonOffset() const { | 443 | void OpenGLState::ApplyPolygonOffset() const { |
| 397 | const bool fill_enable_changed = | 444 | Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, |
| 398 | polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; | 445 | polygon_offset.fill_enable); |
| 399 | const bool line_enable_changed = | 446 | Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, |
| 400 | polygon_offset.line_enable != cur_state.polygon_offset.line_enable; | 447 | polygon_offset.line_enable); |
| 401 | const bool point_enable_changed = | 448 | Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, |
| 402 | polygon_offset.point_enable != cur_state.polygon_offset.point_enable; | 449 | polygon_offset.point_enable); |
| 403 | const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; | 450 | |
| 404 | const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; | 451 | if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, |
| 405 | const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; | 452 | cur_state.polygon_offset.clamp), |
| 406 | 453 | std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { | |
| 407 | if (fill_enable_changed) { | ||
| 408 | if (polygon_offset.fill_enable) { | ||
| 409 | glEnable(GL_POLYGON_OFFSET_FILL); | ||
| 410 | } else { | ||
| 411 | glDisable(GL_POLYGON_OFFSET_FILL); | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | if (line_enable_changed) { | ||
| 416 | if (polygon_offset.line_enable) { | ||
| 417 | glEnable(GL_POLYGON_OFFSET_LINE); | ||
| 418 | } else { | ||
| 419 | glDisable(GL_POLYGON_OFFSET_LINE); | ||
| 420 | } | ||
| 421 | } | ||
| 422 | |||
| 423 | if (point_enable_changed) { | ||
| 424 | if (polygon_offset.point_enable) { | ||
| 425 | glEnable(GL_POLYGON_OFFSET_POINT); | ||
| 426 | } else { | ||
| 427 | glDisable(GL_POLYGON_OFFSET_POINT); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | if (factor_changed || units_changed || clamp_changed) { | ||
| 432 | if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { | 454 | if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { |
| 433 | glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); | 455 | glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); |
| 434 | } else { | 456 | } else { |
| 435 | glPolygonOffset(polygon_offset.factor, polygon_offset.units); | ||
| 436 | UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, | 457 | UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, |
| 437 | "Unimplemented Depth polygon offset clamp."); | 458 | "Unimplemented Depth polygon offset clamp."); |
| 459 | glPolygonOffset(polygon_offset.factor, polygon_offset.units); | ||
| 438 | } | 460 | } |
| 439 | } | 461 | } |
| 440 | } | 462 | } |
| @@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const { | |||
| 443 | bool has_delta{}; | 465 | bool has_delta{}; |
| 444 | std::size_t first{}; | 466 | std::size_t first{}; |
| 445 | std::size_t last{}; | 467 | std::size_t last{}; |
| 446 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures; | 468 | std::array<GLuint, Maxwell::NumTextureSamplers> textures; |
| 447 | 469 | ||
| 448 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { | 470 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { |
| 449 | const auto& texture_unit = texture_units[i]; | 471 | const auto& texture_unit = texture_units[i]; |
| 450 | const auto& cur_state_texture_unit = cur_state.texture_units[i]; | 472 | auto& cur_state_texture_unit = cur_state.texture_units[i]; |
| 451 | textures[i] = texture_unit.texture; | 473 | textures[i] = texture_unit.texture; |
| 452 | 474 | if (cur_state_texture_unit.texture == textures[i]) | |
| 453 | if (textures[i] != cur_state_texture_unit.texture) { | 475 | continue; |
| 454 | if (!has_delta) { | 476 | cur_state_texture_unit.texture = textures[i]; |
| 455 | first = i; | 477 | if (!has_delta) { |
| 456 | has_delta = true; | 478 | first = i; |
| 457 | } | 479 | has_delta = true; |
| 458 | last = i; | ||
| 459 | } | 480 | } |
| 481 | last = i; | ||
| 460 | } | 482 | } |
| 461 | |||
| 462 | if (has_delta) { | 483 | if (has_delta) { |
| 463 | glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), | 484 | glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), |
| 464 | textures.data() + first); | 485 | textures.data() + first); |
| @@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const { | |||
| 469 | bool has_delta{}; | 490 | bool has_delta{}; |
| 470 | std::size_t first{}; | 491 | std::size_t first{}; |
| 471 | std::size_t last{}; | 492 | std::size_t last{}; |
| 472 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; | 493 | std::array<GLuint, Maxwell::NumTextureSamplers> samplers; |
| 494 | |||
| 473 | for (std::size_t i = 0; i < std::size(samplers); ++i) { | 495 | for (std::size_t i = 0; i < std::size(samplers); ++i) { |
| 496 | if (cur_state.texture_units[i].sampler == texture_units[i].sampler) | ||
| 497 | continue; | ||
| 498 | cur_state.texture_units[i].sampler = texture_units[i].sampler; | ||
| 474 | samplers[i] = texture_units[i].sampler; | 499 | samplers[i] = texture_units[i].sampler; |
| 475 | if (samplers[i] != cur_state.texture_units[i].sampler) { | 500 | if (!has_delta) { |
| 476 | if (!has_delta) { | 501 | first = i; |
| 477 | first = i; | 502 | has_delta = true; |
| 478 | has_delta = true; | ||
| 479 | } | ||
| 480 | last = i; | ||
| 481 | } | 503 | } |
| 504 | last = i; | ||
| 482 | } | 505 | } |
| 483 | if (has_delta) { | 506 | if (has_delta) { |
| 484 | glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), | 507 | glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), |
| @@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const { | |||
| 486 | } | 509 | } |
| 487 | } | 510 | } |
| 488 | 511 | ||
| 489 | void OpenGLState::ApplyFramebufferState() const { | ||
| 490 | if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { | ||
| 491 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | ||
| 492 | } | ||
| 493 | if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { | ||
| 494 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); | ||
| 495 | } | ||
| 496 | } | ||
| 497 | |||
| 498 | void OpenGLState::ApplyVertexArrayState() const { | ||
| 499 | if (draw.vertex_array != cur_state.draw.vertex_array) { | ||
| 500 | glBindVertexArray(draw.vertex_array); | ||
| 501 | } | ||
| 502 | } | ||
| 503 | |||
| 504 | void OpenGLState::ApplyDepthClamp() const { | ||
| 505 | if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && | ||
| 506 | depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { | ||
| 507 | return; | ||
| 508 | } | ||
| 509 | UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, | ||
| 510 | "Unimplemented Depth Clamp Separation!"); | ||
| 511 | |||
| 512 | if (depth_clamp.far_plane || depth_clamp.near_plane) { | ||
| 513 | glEnable(GL_DEPTH_CLAMP); | ||
| 514 | } else { | ||
| 515 | glDisable(GL_DEPTH_CLAMP); | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | void OpenGLState::Apply() const { | 512 | void OpenGLState::Apply() const { |
| 520 | ApplyFramebufferState(); | 513 | ApplyFramebufferState(); |
| 521 | ApplyVertexArrayState(); | 514 | ApplyVertexArrayState(); |
| 522 | 515 | ApplyShaderProgram(); | |
| 523 | // Shader program | 516 | ApplyProgramPipeline(); |
| 524 | if (draw.shader_program != cur_state.draw.shader_program) { | 517 | ApplyClipDistances(); |
| 525 | glUseProgram(draw.shader_program); | 518 | ApplyPointSize(); |
| 526 | } | 519 | ApplyFragmentColorClamp(); |
| 527 | 520 | ApplyMultisample(); | |
| 528 | // Program pipeline | ||
| 529 | if (draw.program_pipeline != cur_state.draw.program_pipeline) { | ||
| 530 | glBindProgramPipeline(draw.program_pipeline); | ||
| 531 | } | ||
| 532 | // Clip distance | ||
| 533 | for (std::size_t i = 0; i < clip_distance.size(); ++i) { | ||
| 534 | if (clip_distance[i] != cur_state.clip_distance[i]) { | ||
| 535 | if (clip_distance[i]) { | ||
| 536 | glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); | ||
| 537 | } else { | ||
| 538 | glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); | ||
| 539 | } | ||
| 540 | } | ||
| 541 | } | ||
| 542 | // Point | ||
| 543 | if (point.size != cur_state.point.size) { | ||
| 544 | glPointSize(point.size); | ||
| 545 | } | ||
| 546 | if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) { | ||
| 547 | glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, | ||
| 548 | fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); | ||
| 549 | } | ||
| 550 | if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) { | ||
| 551 | if (multisample_control.alpha_to_coverage) { | ||
| 552 | glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE); | ||
| 553 | } else { | ||
| 554 | glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE); | ||
| 555 | } | ||
| 556 | } | ||
| 557 | if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) { | ||
| 558 | if (multisample_control.alpha_to_one) { | ||
| 559 | glEnable(GL_SAMPLE_ALPHA_TO_ONE); | ||
| 560 | } else { | ||
| 561 | glDisable(GL_SAMPLE_ALPHA_TO_ONE); | ||
| 562 | } | ||
| 563 | } | ||
| 564 | ApplyDepthClamp(); | 521 | ApplyDepthClamp(); |
| 565 | ApplyColorMask(); | 522 | ApplyColorMask(); |
| 566 | ApplyViewport(); | 523 | ApplyViewport(); |
| @@ -574,7 +531,28 @@ void OpenGLState::Apply() const { | |||
| 574 | ApplyTextures(); | 531 | ApplyTextures(); |
| 575 | ApplySamplers(); | 532 | ApplySamplers(); |
| 576 | ApplyPolygonOffset(); | 533 | ApplyPolygonOffset(); |
| 577 | cur_state = *this; | 534 | } |
| 535 | |||
| 536 | void OpenGLState::EmulateViewportWithScissor() { | ||
| 537 | auto& current = viewports[0]; | ||
| 538 | if (current.scissor.enabled) { | ||
| 539 | const GLint left = std::max(current.x, current.scissor.x); | ||
| 540 | const GLint right = | ||
| 541 | std::max(current.x + current.width, current.scissor.x + current.scissor.width); | ||
| 542 | const GLint bottom = std::max(current.y, current.scissor.y); | ||
| 543 | const GLint top = | ||
| 544 | std::max(current.y + current.height, current.scissor.y + current.scissor.height); | ||
| 545 | current.scissor.x = std::max(left, 0); | ||
| 546 | current.scissor.y = std::max(bottom, 0); | ||
| 547 | current.scissor.width = std::max(right - left, 0); | ||
| 548 | current.scissor.height = std::max(top - bottom, 0); | ||
| 549 | } else { | ||
| 550 | current.scissor.enabled = true; | ||
| 551 | current.scissor.x = current.x; | ||
| 552 | current.scissor.y = current.y; | ||
| 553 | current.scissor.width = current.width; | ||
| 554 | current.scissor.height = current.height; | ||
| 555 | } | ||
| 578 | } | 556 | } |
| 579 | 557 | ||
| 580 | OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { | 558 | OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 9e1eda5b1..41418a7b8 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -54,10 +54,6 @@ public: | |||
| 54 | } depth_clamp; // GL_DEPTH_CLAMP | 54 | } depth_clamp; // GL_DEPTH_CLAMP |
| 55 | 55 | ||
| 56 | struct { | 56 | struct { |
| 57 | bool enabled; // viewports arrays are only supported when geometry shaders are enabled. | ||
| 58 | } geometry_shaders; | ||
| 59 | |||
| 60 | struct { | ||
| 61 | bool enabled; // GL_CULL_FACE | 57 | bool enabled; // GL_CULL_FACE |
| 62 | GLenum mode; // GL_CULL_FACE_MODE | 58 | GLenum mode; // GL_CULL_FACE_MODE |
| 63 | GLenum front_face; // GL_FRONT_FACE | 59 | GLenum front_face; // GL_FRONT_FACE |
| @@ -184,34 +180,26 @@ public: | |||
| 184 | static OpenGLState GetCurState() { | 180 | static OpenGLState GetCurState() { |
| 185 | return cur_state; | 181 | return cur_state; |
| 186 | } | 182 | } |
| 183 | |||
| 187 | static bool GetsRGBUsed() { | 184 | static bool GetsRGBUsed() { |
| 188 | return s_rgb_used; | 185 | return s_rgb_used; |
| 189 | } | 186 | } |
| 187 | |||
| 190 | static void ClearsRGBUsed() { | 188 | static void ClearsRGBUsed() { |
| 191 | s_rgb_used = false; | 189 | s_rgb_used = false; |
| 192 | } | 190 | } |
| 191 | |||
| 193 | /// Apply this state as the current OpenGL state | 192 | /// Apply this state as the current OpenGL state |
| 194 | void Apply() const; | 193 | void Apply() const; |
| 195 | /// Apply only the state affecting the framebuffer | 194 | |
| 196 | void ApplyFramebufferState() const; | 195 | void ApplyFramebufferState() const; |
| 197 | /// Apply only the state affecting the vertex array | ||
| 198 | void ApplyVertexArrayState() const; | 196 | void ApplyVertexArrayState() const; |
| 199 | /// Set the initial OpenGL state | 197 | void ApplyShaderProgram() const; |
| 200 | static void ApplyDefaultState(); | 198 | void ApplyProgramPipeline() const; |
| 201 | /// Resets any references to the given resource | 199 | void ApplyClipDistances() const; |
| 202 | OpenGLState& UnbindTexture(GLuint handle); | 200 | void ApplyPointSize() const; |
| 203 | OpenGLState& ResetSampler(GLuint handle); | 201 | void ApplyFragmentColorClamp() const; |
| 204 | OpenGLState& ResetProgram(GLuint handle); | 202 | void ApplyMultisample() const; |
| 205 | OpenGLState& ResetPipeline(GLuint handle); | ||
| 206 | OpenGLState& ResetVertexArray(GLuint handle); | ||
| 207 | OpenGLState& ResetFramebuffer(GLuint handle); | ||
| 208 | void EmulateViewportWithScissor(); | ||
| 209 | |||
| 210 | private: | ||
| 211 | static OpenGLState cur_state; | ||
| 212 | // Workaround for sRGB problems caused by | ||
| 213 | // QT not supporting srgb output | ||
| 214 | static bool s_rgb_used; | ||
| 215 | void ApplySRgb() const; | 203 | void ApplySRgb() const; |
| 216 | void ApplyCulling() const; | 204 | void ApplyCulling() const; |
| 217 | void ApplyColorMask() const; | 205 | void ApplyColorMask() const; |
| @@ -227,6 +215,26 @@ private: | |||
| 227 | void ApplySamplers() const; | 215 | void ApplySamplers() const; |
| 228 | void ApplyDepthClamp() const; | 216 | void ApplyDepthClamp() const; |
| 229 | void ApplyPolygonOffset() const; | 217 | void ApplyPolygonOffset() const; |
| 218 | |||
| 219 | /// Set the initial OpenGL state | ||
| 220 | static void ApplyDefaultState(); | ||
| 221 | |||
| 222 | /// Resets any references to the given resource | ||
| 223 | OpenGLState& UnbindTexture(GLuint handle); | ||
| 224 | OpenGLState& ResetSampler(GLuint handle); | ||
| 225 | OpenGLState& ResetProgram(GLuint handle); | ||
| 226 | OpenGLState& ResetPipeline(GLuint handle); | ||
| 227 | OpenGLState& ResetVertexArray(GLuint handle); | ||
| 228 | OpenGLState& ResetFramebuffer(GLuint handle); | ||
| 229 | |||
| 230 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | ||
| 231 | void EmulateViewportWithScissor(); | ||
| 232 | |||
| 233 | private: | ||
| 234 | static OpenGLState cur_state; | ||
| 235 | |||
| 236 | // Workaround for sRGB problems caused by QT not supporting srgb output | ||
| 237 | static bool s_rgb_used; | ||
| 230 | }; | 238 | }; |
| 231 | 239 | ||
| 232 | } // namespace OpenGL | 240 | } // namespace OpenGL |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a99ae19bf..a775b402b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -7,7 +7,9 @@ | |||
| 7 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/logging/log.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 12 | #include "video_core/shader/shader_ir.h" | 14 | #include "video_core/shader/shader_ir.h" |
| 13 | 15 | ||
| @@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 41 | 43 | ||
| 42 | switch (opcode->get().GetId()) { | 44 | switch (opcode->get().GetId()) { |
| 43 | case OpCode::Id::TEX: { | 45 | case OpCode::Id::TEX: { |
| 44 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 45 | "AOFFI is not implemented"); | ||
| 46 | |||
| 47 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | 46 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { |
| 48 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | 47 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | const TextureType texture_type{instr.tex.texture_type}; | 50 | const TextureType texture_type{instr.tex.texture_type}; |
| 52 | const bool is_array = instr.tex.array != 0; | 51 | const bool is_array = instr.tex.array != 0; |
| 52 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 53 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | 53 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |
| 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); | 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); |
| 55 | WriteTexInstructionFloat( | 55 | WriteTexInstructionFloat( |
| 56 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | 56 | bb, instr, |
| 57 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); | ||
| 57 | break; | 58 | break; |
| 58 | } | 59 | } |
| 59 | case OpCode::Id::TEXS: { | 60 | case OpCode::Id::TEXS: { |
| @@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 78 | } | 79 | } |
| 79 | case OpCode::Id::TLD4: { | 80 | case OpCode::Id::TLD4: { |
| 80 | ASSERT(instr.tld4.array == 0); | 81 | ASSERT(instr.tld4.array == 0); |
| 81 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 82 | "AOFFI is not implemented"); | ||
| 83 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | 82 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |
| 84 | "NDV is not implemented"); | 83 | "NDV is not implemented"); |
| 85 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | 84 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), |
| @@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 92 | const auto texture_type = instr.tld4.texture_type.Value(); | 91 | const auto texture_type = instr.tld4.texture_type.Value(); |
| 93 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | 92 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |
| 94 | const bool is_array = instr.tld4.array != 0; | 93 | const bool is_array = instr.tld4.array != 0; |
| 95 | WriteTexInstructionFloat(bb, instr, | 94 | const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); |
| 96 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | 95 | WriteTexInstructionFloat( |
| 96 | bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); | ||
| 97 | break; | 97 | break; |
| 98 | } | 98 | } |
| 99 | case OpCode::Id::TLD4S: { | 99 | case OpCode::Id::TLD4S: { |
| @@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 127 | Node4 values; | 127 | Node4 values; |
| 128 | for (u32 element = 0; element < values.size(); ++element) { | 128 | for (u32 element = 0; element < values.size(); ++element) { |
| 129 | auto coords_copy = coords; | 129 | auto coords_copy = coords; |
| 130 | MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; | 130 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; |
| 131 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 131 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 132 | } | 132 | } |
| 133 | 133 | ||
| @@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 152 | if (!instr.txq.IsComponentEnabled(element)) { | 152 | if (!instr.txq.IsComponentEnabled(element)) { |
| 153 | continue; | 153 | continue; |
| 154 | } | 154 | } |
| 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 156 | const Node value = | 156 | const Node value = |
| 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); |
| 158 | SetTemporal(bb, indexer++, value); | 158 | SetTemporal(bb, indexer++, value); |
| @@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 202 | 202 | ||
| 203 | for (u32 element = 0; element < 2; ++element) { | 203 | for (u32 element = 0; element < 2; ++element) { |
| 204 | auto params = coords; | 204 | auto params = coords; |
| 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 207 | SetTemporal(bb, element, value); | 207 | SetTemporal(bb, element, value); |
| 208 | } | 208 | } |
| @@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 325 | 325 | ||
| 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 327 | TextureProcessMode process_mode, std::vector<Node> coords, | 327 | TextureProcessMode process_mode, std::vector<Node> coords, |
| 328 | Node array, Node depth_compare, u32 bias_offset) { | 328 | Node array, Node depth_compare, u32 bias_offset, |
| 329 | std::vector<Node> aoffi) { | ||
| 329 | const bool is_array = array; | 330 | const bool is_array = array; |
| 330 | const bool is_shadow = depth_compare; | 331 | const bool is_shadow = depth_compare; |
| 331 | 332 | ||
| @@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 374 | Node4 values; | 375 | Node4 values; |
| 375 | for (u32 element = 0; element < values.size(); ++element) { | 376 | for (u32 element = 0; element < values.size(); ++element) { |
| 376 | auto copy_coords = coords; | 377 | auto copy_coords = coords; |
| 377 | MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; | 378 | MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; |
| 378 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | 379 | values[element] = Operation(read_method, meta, std::move(copy_coords)); |
| 379 | } | 380 | } |
| 380 | 381 | ||
| @@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 382 | } | 383 | } |
| 383 | 384 | ||
| 384 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | 385 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
| 385 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | 386 | TextureProcessMode process_mode, bool depth_compare, bool is_array, |
| 386 | const bool lod_bias_enabled = | 387 | bool is_aoffi) { |
| 387 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 388 | const bool lod_bias_enabled{ |
| 389 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||
| 390 | |||
| 391 | u64 parameter_register = instr.gpr20.Value(); | ||
| 392 | if (lod_bias_enabled) { | ||
| 393 | ++parameter_register; | ||
| 394 | } | ||
| 388 | 395 | ||
| 389 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | 396 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |
| 390 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | 397 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); |
| @@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 404 | 411 | ||
| 405 | const Node array = is_array ? GetRegister(array_register) : nullptr; | 412 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 406 | 413 | ||
| 414 | std::vector<Node> aoffi; | ||
| 415 | if (is_aoffi) { | ||
| 416 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||
| 417 | } | ||
| 418 | |||
| 407 | Node dc{}; | 419 | Node dc{}; |
| 408 | if (depth_compare) { | 420 | if (depth_compare) { |
| 409 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | 421 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod |
| 410 | // or bias are used | 422 | // or bias are used |
| 411 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | 423 | dc = GetRegister(parameter_register++); |
| 412 | dc = GetRegister(depth_register); | ||
| 413 | } | 424 | } |
| 414 | 425 | ||
| 415 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | 426 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); |
| 416 | } | 427 | } |
| 417 | 428 | ||
| 418 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 429 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| @@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 448 | dc = GetRegister(depth_register); | 459 | dc = GetRegister(depth_register); |
| 449 | } | 460 | } |
| 450 | 461 | ||
| 451 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | 462 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); |
| 452 | } | 463 | } |
| 453 | 464 | ||
| 454 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 465 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
| 455 | bool is_array) { | 466 | bool is_array, bool is_aoffi) { |
| 456 | const std::size_t coord_count = GetCoordCount(texture_type); | 467 | const std::size_t coord_count = GetCoordCount(texture_type); |
| 457 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | 468 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |
| 458 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | 469 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |
| @@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 463 | const u64 coord_register = array_register + (is_array ? 1 : 0); | 474 | const u64 coord_register = array_register + (is_array ? 1 : 0); |
| 464 | 475 | ||
| 465 | std::vector<Node> coords; | 476 | std::vector<Node> coords; |
| 466 | for (size_t i = 0; i < coord_count; ++i) | 477 | for (std::size_t i = 0; i < coord_count; ++i) { |
| 467 | coords.push_back(GetRegister(coord_register + i)); | 478 | coords.push_back(GetRegister(coord_register + i)); |
| 479 | } | ||
| 480 | |||
| 481 | u64 parameter_register = instr.gpr20.Value(); | ||
| 482 | std::vector<Node> aoffi; | ||
| 483 | if (is_aoffi) { | ||
| 484 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||
| 485 | } | ||
| 486 | |||
| 487 | Node dc{}; | ||
| 488 | if (depth_compare) { | ||
| 489 | dc = GetRegister(parameter_register++); | ||
| 490 | } | ||
| 468 | 491 | ||
| 469 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 492 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 470 | 493 | ||
| 471 | Node4 values; | 494 | Node4 values; |
| 472 | for (u32 element = 0; element < values.size(); ++element) { | 495 | for (u32 element = 0; element < values.size(); ++element) { |
| 473 | auto coords_copy = coords; | 496 | auto coords_copy = coords; |
| 474 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; | 497 | MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; |
| 475 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 498 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 476 | } | 499 | } |
| 477 | 500 | ||
| @@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 507 | Node4 values; | 530 | Node4 values; |
| 508 | for (u32 element = 0; element < values.size(); ++element) { | 531 | for (u32 element = 0; element < values.size(); ++element) { |
| 509 | auto coords_copy = coords; | 532 | auto coords_copy = coords; |
| 510 | MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; | 533 | MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; |
| 511 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | 534 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 512 | } | 535 | } |
| 513 | return values; | 536 | return values; |
| @@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | |||
| 531 | return {coord_count, total_coord_count}; | 554 | return {coord_count, total_coord_count}; |
| 532 | } | 555 | } |
| 533 | 556 | ||
| 534 | } // namespace VideoCommon::Shader \ No newline at end of file | 557 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, |
| 558 | bool is_tld4) { | ||
| 559 | const auto [coord_offsets, size, wrap_value, | ||
| 560 | diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { | ||
| 561 | if (is_tld4) { | ||
| 562 | return {{0, 8, 16}, 6, 32, 64}; | ||
| 563 | } else { | ||
| 564 | return {{0, 4, 8}, 4, 8, 16}; | ||
| 565 | } | ||
| 566 | }(); | ||
| 567 | const u32 mask = (1U << size) - 1; | ||
| 568 | |||
| 569 | std::vector<Node> aoffi; | ||
| 570 | aoffi.reserve(coord_count); | ||
| 571 | |||
| 572 | const auto aoffi_immediate{ | ||
| 573 | TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||
| 574 | if (!aoffi_immediate) { | ||
| 575 | // Variable access, not supported on AMD. | ||
| 576 | LOG_WARNING(HW_GPU, | ||
| 577 | "AOFFI constant folding failed, some hardware might have graphical issues"); | ||
| 578 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 579 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); | ||
| 580 | const Node condition = | ||
| 581 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||
| 582 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||
| 583 | aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 584 | } | ||
| 585 | return aoffi; | ||
| 586 | } | ||
| 587 | |||
| 588 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 589 | s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; | ||
| 590 | if (value >= wrap_value) { | ||
| 591 | value -= diff_value; | ||
| 592 | } | ||
| 593 | aoffi.push_back(Immediate(value)); | ||
| 594 | } | ||
| 595 | return aoffi; | ||
| 596 | } | ||
| 597 | |||
| 598 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 5bc3a3900..4888998d3 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <map> | 9 | #include <map> |
| 10 | #include <optional> | ||
| 10 | #include <set> | 11 | #include <set> |
| 11 | #include <string> | 12 | #include <string> |
| 12 | #include <tuple> | 13 | #include <tuple> |
| @@ -290,6 +291,7 @@ struct MetaTexture { | |||
| 290 | const Sampler& sampler; | 291 | const Sampler& sampler; |
| 291 | Node array{}; | 292 | Node array{}; |
| 292 | Node depth_compare{}; | 293 | Node depth_compare{}; |
| 294 | std::vector<Node> aoffi; | ||
| 293 | Node bias{}; | 295 | Node bias{}; |
| 294 | Node lod{}; | 296 | Node lod{}; |
| 295 | Node component{}; | 297 | Node component{}; |
| @@ -741,14 +743,14 @@ private: | |||
| 741 | 743 | ||
| 742 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 744 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 743 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 745 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 744 | bool is_array); | 746 | bool is_array, bool is_aoffi); |
| 745 | 747 | ||
| 746 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 748 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 747 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 749 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 748 | bool is_array); | 750 | bool is_array); |
| 749 | 751 | ||
| 750 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 752 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 751 | bool depth_compare, bool is_array); | 753 | bool depth_compare, bool is_array, bool is_aoffi); |
| 752 | 754 | ||
| 753 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 755 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 754 | bool is_array); | 756 | bool is_array); |
| @@ -757,9 +759,11 @@ private: | |||
| 757 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | 759 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |
| 758 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | 760 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
| 759 | 761 | ||
| 762 | std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||
| 763 | |||
| 760 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 764 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 761 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | 765 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |
| 762 | Node array, Node depth_compare, u32 bias_offset); | 766 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); |
| 763 | 767 | ||
| 764 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | 768 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
| 765 | u64 byte_height); | 769 | u64 byte_height); |
| @@ -773,6 +777,8 @@ private: | |||
| 773 | 777 | ||
| 774 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); | 778 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); |
| 775 | 779 | ||
| 780 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); | ||
| 781 | |||
| 776 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); | 782 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); |
| 777 | 783 | ||
| 778 | template <typename... T> | 784 | template <typename... T> |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 33b071747..4505667ff 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <utility> | 6 | #include <utility> |
| 7 | #include <variant> | 7 | #include <variant> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | 10 | #include "video_core/shader/shader_ir.h" |
| 10 | 11 | ||
| 11 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| @@ -14,7 +15,7 @@ namespace { | |||
| 14 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 15 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 16 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 17 | const Node node = code[cursor]; | 18 | const Node node = code.at(cursor); |
| 18 | if (const auto operation = std::get_if<OperationNode>(node)) { | 19 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 19 | if (operation->GetCode() == operation_code) | 20 | if (operation->GetCode() == operation_code) |
| 20 | return {node, cursor}; | 21 | return {node, cursor}; |
| @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { | |||
| 64 | return nullptr; | 65 | return nullptr; |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 68 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { | ||
| 69 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||
| 70 | // that it uses as operand | ||
| 71 | const auto [found, found_cursor] = | ||
| 72 | TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||
| 73 | if (!found) { | ||
| 74 | return {}; | ||
| 75 | } | ||
| 76 | if (const auto immediate = std::get_if<ImmediateNode>(found)) { | ||
| 77 | return immediate->GetValue(); | ||
| 78 | } | ||
| 79 | return {}; | ||
| 80 | } | ||
| 81 | |||
| 67 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | 82 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, |
| 68 | s64 cursor) { | 83 | s64 cursor) { |
| 69 | for (; cursor >= 0; --cursor) { | 84 | for (; cursor >= 0; --cursor) { |