summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--externals/CMakeLists.txt4
m---------externals/zstd0
-rw-r--r--src/common/CMakeLists.txt4
-rw-r--r--src/common/zstd_compression.cpp53
-rw-r--r--src/common/zstd_compression.h42
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp2
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h6
-rw-r--r--src/core/hle/ipc_helpers.h4
-rw-r--r--src/core/hle/kernel/client_port.cpp10
-rw-r--r--src/core/hle/kernel/kernel.cpp2
-rw-r--r--src/core/hle/kernel/process.cpp3
-rw-r--r--src/core/hle/kernel/scheduler.cpp1
-rw-r--r--src/core/hle/kernel/server_port.cpp7
-rw-r--r--src/core/hle/kernel/server_port.h7
-rw-r--r--src/core/hle/kernel/server_session.cpp2
-rw-r--r--src/core/hle/kernel/server_session.h3
-rw-r--r--src/core/memory.cpp4
-rw-r--r--src/core/memory.h13
-rw-r--r--src/tests/core/arm/arm_test_common.cpp3
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/memory_manager.cpp87
-rw-r--r--src/video_core/memory_manager.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp133
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp610
-rw-r--r--src/video_core/renderer_opengl/gl_state.h52
-rw-r--r--src/video_core/shader/decode/texture.cpp114
-rw-r--r--src/video_core/shader/shader_ir.h12
-rw-r--r--src/video_core/shader/track.cpp17
31 files changed, 743 insertions, 477 deletions
diff --git a/.gitmodules b/.gitmodules
index 2558a5ebc..26b4e5272 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -40,3 +40,6 @@
40[submodule "Vulkan-Headers"] 40[submodule "Vulkan-Headers"]
41 path = externals/Vulkan-Headers 41 path = externals/Vulkan-Headers
42 url = https://github.com/KhronosGroup/Vulkan-Headers.git 42 url = https://github.com/KhronosGroup/Vulkan-Headers.git
43[submodule "externals/zstd"]
44 path = externals/zstd
45 url = https://github.com/facebook/zstd
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index e156bbece..aa3319eb1 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -49,6 +49,10 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
49add_library(unicorn-headers INTERFACE) 49add_library(unicorn-headers INTERFACE)
50target_include_directories(unicorn-headers INTERFACE ./unicorn/include) 50target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
51 51
52# Zstandard
53add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL)
54target_include_directories(libzstd_static INTERFACE ./zstd/lib)
55
52# SoundTouch 56# SoundTouch
53add_subdirectory(soundtouch) 57add_subdirectory(soundtouch)
54 58
diff --git a/externals/zstd b/externals/zstd
new file mode 160000
Subproject 470344d33e1d52a2ada75d278466da8d4ee2faf
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 5639021d3..1e8e1b215 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -125,6 +125,8 @@ add_library(common STATIC
125 uint128.h 125 uint128.h
126 vector_math.h 126 vector_math.h
127 web_result.h 127 web_result.h
128 zstd_compression.cpp
129 zstd_compression.h
128) 130)
129 131
130if(ARCHITECTURE_x86_64) 132if(ARCHITECTURE_x86_64)
@@ -138,4 +140,4 @@ endif()
138create_target_directory_groups(common) 140create_target_directory_groups(common)
139 141
140target_link_libraries(common PUBLIC Boost::boost fmt microprofile) 142target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
141target_link_libraries(common PRIVATE lz4_static) 143target_link_libraries(common PRIVATE lz4_static libzstd_static)
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
new file mode 100644
index 000000000..60a35c67c
--- /dev/null
+++ b/src/common/zstd_compression.cpp
@@ -0,0 +1,53 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <zstd.h>
9
10#include "common/assert.h"
11#include "common/zstd_compression.h"
12
13namespace Common::Compression {
14
15std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) {
16 compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel());
17
18 const std::size_t max_compressed_size = ZSTD_compressBound(source_size);
19 std::vector<u8> compressed(max_compressed_size);
20
21 const std::size_t compressed_size =
22 ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level);
23
24 if (ZSTD_isError(compressed_size)) {
25 // Compression failed
26 return {};
27 }
28
29 compressed.resize(compressed_size);
30
31 return compressed;
32}
33
34std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) {
35 return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT);
36}
37
38std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) {
39 const std::size_t decompressed_size =
40 ZSTD_getDecompressedSize(compressed.data(), compressed.size());
41 std::vector<u8> decompressed(decompressed_size);
42
43 const std::size_t uncompressed_result_size = ZSTD_decompress(
44 decompressed.data(), decompressed.size(), compressed.data(), compressed.size());
45
46 if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) {
47 // Decompression failed
48 return {};
49 }
50 return decompressed;
51}
52
53} // namespace Common::Compression
diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h
new file mode 100644
index 000000000..e0a64b035
--- /dev/null
+++ b/src/common/zstd_compression.h
@@ -0,0 +1,42 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_types.h"
8
9namespace Common::Compression {
10
11/**
12 * Compresses a source memory region with Zstandard and returns the compressed data in a vector.
13 *
14 * @param source the uncompressed source memory region.
15 * @param source_size the size in bytes of the uncompressed source memory region.
16 * @param compression_level the used compression level. Should be between 1 and 22.
17 *
18 * @return the compressed data.
19 */
20std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level);
21
22/**
23 * Compresses a source memory region with Zstandard with the default compression level and returns
24 * the compressed data in a vector.
25 *
26 * @param source the uncompressed source memory region.
27 * @param source_size the size in bytes of the uncompressed source memory region.
28 *
29 * @return the compressed data.
30 */
31std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size);
32
33/**
34 * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector.
35 *
36 * @param compressed the compressed source memory region.
37 *
38 * @return the decompressed data.
39 */
40std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed);
41
42} // namespace Common::Compression \ No newline at end of file
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index f64e4c6a6..49145911b 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -163,7 +163,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)
163 163
164void ARM_Dynarmic::Run() { 164void ARM_Dynarmic::Run() {
165 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); 165 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic);
166 ASSERT(Memory::GetCurrentPageTable() == current_page_table);
167 166
168 jit->Run(); 167 jit->Run();
169} 168}
@@ -278,7 +277,6 @@ void ARM_Dynarmic::ClearExclusiveState() {
278 277
279void ARM_Dynarmic::PageTableChanged() { 278void ARM_Dynarmic::PageTableChanged() {
280 jit = MakeJit(); 279 jit = MakeJit();
281 current_page_table = Memory::GetCurrentPageTable();
282} 280}
283 281
284DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} 282DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 81e0b4ac0..d867c2a50 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,10 +12,6 @@
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14 14
15namespace Common {
16struct PageTable;
17}
18
19namespace Core::Timing { 15namespace Core::Timing {
20class CoreTiming; 16class CoreTiming;
21} 17}
@@ -69,8 +65,6 @@ private:
69 std::size_t core_index; 65 std::size_t core_index;
70 Timing::CoreTiming& core_timing; 66 Timing::CoreTiming& core_timing;
71 DynarmicExclusiveMonitor& exclusive_monitor; 67 DynarmicExclusiveMonitor& exclusive_monitor;
72
73 Common::PageTable* current_page_table = nullptr;
74}; 68};
75 69
76class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 70class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 68406eb63..ac0e1d796 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -139,10 +139,8 @@ public:
139 context->AddDomainObject(std::move(iface)); 139 context->AddDomainObject(std::move(iface));
140 } else { 140 } else {
141 auto& kernel = Core::System::GetInstance().Kernel(); 141 auto& kernel = Core::System::GetInstance().Kernel();
142 auto sessions = 142 auto [server, client] =
143 Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName()); 143 Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
144 auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
145 auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
146 iface->ClientConnected(server); 144 iface->ClientConnected(server);
147 context->AddMoveObject(std::move(client)); 145 context->AddMoveObject(std::move(client));
148 } 146 }
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index aa432658e..744b1697d 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -2,8 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <tuple>
6
7#include "core/hle/kernel/client_port.h" 5#include "core/hle/kernel/client_port.h"
8#include "core/hle/kernel/client_session.h" 6#include "core/hle/kernel/client_session.h"
9#include "core/hle/kernel/errors.h" 7#include "core/hle/kernel/errors.h"
@@ -31,18 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
31 active_sessions++; 29 active_sessions++;
32 30
33 // Create a new session pair, let the created sessions inherit the parent port's HLE handler. 31 // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
34 auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); 32 auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
35 33
36 if (server_port->HasHLEHandler()) { 34 if (server_port->HasHLEHandler()) {
37 server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); 35 server_port->GetHLEHandler()->ClientConnected(server);
38 } else { 36 } else {
39 server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions)); 37 server_port->AppendPendingSession(server);
40 } 38 }
41 39
42 // Wake the threads waiting on the ServerPort 40 // Wake the threads waiting on the ServerPort
43 server_port->WakeupAllWaitingThreads(); 41 server_port->WakeupAllWaitingThreads();
44 42
45 return MakeResult(std::get<SharedPtr<ClientSession>>(sessions)); 43 return MakeResult(client);
46} 44}
47 45
48void ClientPort::ConnectionClosed() { 46void ClientPort::ConnectionClosed() {
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 3f14bfa86..4d58e7c69 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -21,6 +21,7 @@
21#include "core/hle/kernel/thread.h" 21#include "core/hle/kernel/thread.h"
22#include "core/hle/lock.h" 22#include "core/hle/lock.h"
23#include "core/hle/result.h" 23#include "core/hle/result.h"
24#include "core/memory.h"
24 25
25namespace Kernel { 26namespace Kernel {
26 27
@@ -181,6 +182,7 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
181 182
182void KernelCore::MakeCurrentProcess(Process* process) { 183void KernelCore::MakeCurrentProcess(Process* process) {
183 impl->current_process = process; 184 impl->current_process = process;
185 Memory::SetCurrentPageTable(&process->VMManager().page_table);
184} 186}
185 187
186Process* KernelCore::CurrentProcess() { 188Process* KernelCore::CurrentProcess() {
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 041267318..26c6b95ab 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -32,9 +32,6 @@ namespace {
32 * @param priority The priority to give the main thread 32 * @param priority The priority to give the main thread
33 */ 33 */
34void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { 34void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
35 // Setup page table so we can write to memory
36 Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
37
38 // Initialize new "main" thread 35 // Initialize new "main" thread
39 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); 36 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
40 auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, 37 auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index ac501bf7f..e8447b69a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -101,7 +101,6 @@ void Scheduler::SwitchContext(Thread* new_thread) {
101 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 101 auto* const thread_owner_process = current_thread->GetOwnerProcess();
102 if (previous_process != thread_owner_process) { 102 if (previous_process != thread_owner_process) {
103 system.Kernel().MakeCurrentProcess(thread_owner_process); 103 system.Kernel().MakeCurrentProcess(thread_owner_process);
104 Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
105 } 104 }
106 105
107 cpu_core.LoadContext(new_thread->GetContext()); 106 cpu_core.LoadContext(new_thread->GetContext());
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index 708fdf9e1..02e7c60e6 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -39,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) {
39 ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); 39 ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
40} 40}
41 41
42std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair( 42ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
43 KernelCore& kernel, u32 max_sessions, std::string name) { 43 std::string name) {
44
45 SharedPtr<ServerPort> server_port(new ServerPort(kernel)); 44 SharedPtr<ServerPort> server_port(new ServerPort(kernel));
46 SharedPtr<ClientPort> client_port(new ClientPort(kernel)); 45 SharedPtr<ClientPort> client_port(new ClientPort(kernel));
47 46
@@ -51,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP
51 client_port->max_sessions = max_sessions; 50 client_port->max_sessions = max_sessions;
52 client_port->active_sessions = 0; 51 client_port->active_sessions = 0;
53 52
54 return std::make_tuple(std::move(server_port), std::move(client_port)); 53 return std::make_pair(std::move(server_port), std::move(client_port));
55} 54}
56 55
57} // namespace Kernel 56} // namespace Kernel
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index 76293cb8b..fef573b71 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -6,7 +6,7 @@
6 6
7#include <memory> 7#include <memory>
8#include <string> 8#include <string>
9#include <tuple> 9#include <utility>
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
@@ -23,6 +23,7 @@ class SessionRequestHandler;
23class ServerPort final : public WaitObject { 23class ServerPort final : public WaitObject {
24public: 24public:
25 using HLEHandler = std::shared_ptr<SessionRequestHandler>; 25 using HLEHandler = std::shared_ptr<SessionRequestHandler>;
26 using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>;
26 27
27 /** 28 /**
28 * Creates a pair of ServerPort and an associated ClientPort. 29 * Creates a pair of ServerPort and an associated ClientPort.
@@ -32,8 +33,8 @@ public:
32 * @param name Optional name of the ports 33 * @param name Optional name of the ports
33 * @return The created port tuple 34 * @return The created port tuple
34 */ 35 */
35 static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair( 36 static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions,
36 KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort"); 37 std::string name = "UnknownPort");
37 38
38 std::string GetTypeName() const override { 39 std::string GetTypeName() const override {
39 return "ServerPort"; 40 return "ServerPort";
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 40cec143e..a6b2cf06a 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -204,6 +204,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
204 client_session->parent = parent; 204 client_session->parent = parent;
205 server_session->parent = parent; 205 server_session->parent = parent;
206 206
207 return std::make_tuple(std::move(server_session), std::move(client_session)); 207 return std::make_pair(std::move(server_session), std::move(client_session));
208} 208}
209} // namespace Kernel 209} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index 3429a326f..09b835ff8 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -6,6 +6,7 @@
6 6
7#include <memory> 7#include <memory>
8#include <string> 8#include <string>
9#include <utility>
9#include <vector> 10#include <vector>
10 11
11#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
@@ -58,7 +59,7 @@ public:
58 return parent.get(); 59 return parent.get();
59 } 60 }
60 61
61 using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; 62 using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
62 63
63 /** 64 /**
64 * Creates a pair of ServerSession and an associated ClientSession. 65 * Creates a pair of ServerSession and an associated ClientSession.
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 332c1037c..4e0538bc2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -38,10 +38,6 @@ void SetCurrentPageTable(Common::PageTable* page_table) {
38 } 38 }
39} 39}
40 40
41Common::PageTable* GetCurrentPageTable() {
42 return current_page_table;
43}
44
45static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, 41static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
46 Common::PageType type) { 42 Common::PageType type) {
47 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 43 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
diff --git a/src/core/memory.h b/src/core/memory.h
index 1d38cdca8..6845f5fe1 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -28,16 +28,6 @@ constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
28 28
29/// Virtual user-space memory regions 29/// Virtual user-space memory regions
30enum : VAddr { 30enum : VAddr {
31 /// Read-only page containing kernel and system configuration values.
32 CONFIG_MEMORY_VADDR = 0x1FF80000,
33 CONFIG_MEMORY_SIZE = 0x00001000,
34 CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE,
35
36 /// Usually read-only page containing mostly values read from hardware.
37 SHARED_PAGE_VADDR = 0x1FF81000,
38 SHARED_PAGE_SIZE = 0x00001000,
39 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
40
41 /// TLS (Thread-Local Storage) related. 31 /// TLS (Thread-Local Storage) related.
42 TLS_ENTRY_SIZE = 0x200, 32 TLS_ENTRY_SIZE = 0x200,
43 33
@@ -50,9 +40,8 @@ enum : VAddr {
50 KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, 40 KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
51}; 41};
52 42
53/// Currently active page table 43/// Changes the currently active page table.
54void SetCurrentPageTable(Common::PageTable* page_table); 44void SetCurrentPageTable(Common::PageTable* page_table);
55Common::PageTable* GetCurrentPageTable();
56 45
57/// Determines if the given VAddr is valid for the specified process. 46/// Determines if the given VAddr is valid for the specified process.
58bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); 47bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 3e1a735c3..58af41f6e 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -17,7 +17,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
17 : mutable_memory(mutable_memory_), 17 : mutable_memory(mutable_memory_),
18 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { 18 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
19 auto process = Kernel::Process::Create(Core::System::GetInstance(), ""); 19 auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
20 kernel.MakeCurrentProcess(process.get());
21 page_table = &process->VMManager().page_table; 20 page_table = &process->VMManager().page_table;
22 21
23 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
@@ -28,7 +27,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
28 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); 27 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
29 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); 28 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
30 29
31 Memory::SetCurrentPageTable(page_table); 30 kernel.MakeCurrentProcess(process.get());
32} 31}
33 32
34TestEnvironment::~TestEnvironment() { 33TestEnvironment::~TestEnvironment() {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 30b29e14d..4461083ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
31 31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
33 auto& rasterizer{renderer.Rasterizer()}; 33 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(); 34 memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 8417324ff..0f4e820aa 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,16 +5,13 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
9#include "core/memory.h" 8#include "core/memory.h"
10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
12#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_base.h"
14 11
15namespace Tegra { 12namespace Tegra {
16 13
17MemoryManager::MemoryManager() { 14MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
18 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 15 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
19 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 16 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
20 Common::PageType::Unmapped); 17 Common::PageType::Unmapped);
@@ -70,8 +67,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
70 const u64 aligned_size{Common::AlignUp(size, page_size)}; 67 const u64 aligned_size{Common::AlignUp(size, page_size)};
71 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; 68 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
72 69
73 Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr, 70 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
74 aligned_size);
75 UnmapRange(gpu_addr, aligned_size); 71 UnmapRange(gpu_addr, aligned_size);
76 72
77 return gpu_addr; 73 return gpu_addr;
@@ -204,14 +200,85 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
204} 200}
205 201
206void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { 202void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
207 std::memcpy(dest_buffer, GetPointer(src_addr), size); 203 std::size_t remaining_size{size};
204 std::size_t page_index{src_addr >> page_bits};
205 std::size_t page_offset{src_addr & page_mask};
206
207 while (remaining_size > 0) {
208 const std::size_t copy_amount{
209 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
210
211 switch (page_table.attributes[page_index]) {
212 case Common::PageType::Memory: {
213 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
214 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
215 std::memcpy(dest_buffer, src_ptr, copy_amount);
216 break;
217 }
218 default:
219 UNREACHABLE();
220 }
221
222 page_index++;
223 page_offset = 0;
224 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
225 remaining_size -= copy_amount;
226 }
208} 227}
228
209void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { 229void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
210 std::memcpy(GetPointer(dest_addr), src_buffer, size); 230 std::size_t remaining_size{size};
231 std::size_t page_index{dest_addr >> page_bits};
232 std::size_t page_offset{dest_addr & page_mask};
233
234 while (remaining_size > 0) {
235 const std::size_t copy_amount{
236 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
237
238 switch (page_table.attributes[page_index]) {
239 case Common::PageType::Memory: {
240 u8* dest_ptr{page_table.pointers[page_index] + page_offset};
241 rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
242 std::memcpy(dest_ptr, src_buffer, copy_amount);
243 break;
244 }
245 default:
246 UNREACHABLE();
247 }
248
249 page_index++;
250 page_offset = 0;
251 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
252 remaining_size -= copy_amount;
253 }
211} 254}
212 255
213void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { 256void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
214 std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); 257 std::size_t remaining_size{size};
258 std::size_t page_index{src_addr >> page_bits};
259 std::size_t page_offset{src_addr & page_mask};
260
261 while (remaining_size > 0) {
262 const std::size_t copy_amount{
263 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
264
265 switch (page_table.attributes[page_index]) {
266 case Common::PageType::Memory: {
267 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
268 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
269 WriteBlock(dest_addr, src_ptr, copy_amount);
270 break;
271 }
272 default:
273 UNREACHABLE();
274 }
275
276 page_index++;
277 page_offset = 0;
278 dest_addr += static_cast<VAddr>(copy_amount);
279 src_addr += static_cast<VAddr>(copy_amount);
280 remaining_size -= copy_amount;
281 }
215} 282}
216 283
217void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, 284void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
@@ -351,7 +418,7 @@ MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
351 const VirtualMemoryArea& vma{vma_handle->second}; 418 const VirtualMemoryArea& vma{vma_handle->second};
352 if (vma.type == VirtualMemoryArea::Type::Mapped) { 419 if (vma.type == VirtualMemoryArea::Type::Mapped) {
353 // Region is already allocated 420 // Region is already allocated
354 return {}; 421 return vma_handle;
355 } 422 }
356 423
357 const VAddr start_in_vma{base - vma.base}; 424 const VAddr start_in_vma{base - vma.base};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 178e2f655..647cbf93a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,6 +10,10 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/page_table.h" 11#include "common/page_table.h"
12 12
13namespace VideoCore {
14class RasterizerInterface;
15}
16
13namespace Tegra { 17namespace Tegra {
14 18
15/** 19/**
@@ -43,7 +47,7 @@ struct VirtualMemoryArea {
43 47
44class MemoryManager final { 48class MemoryManager final {
45public: 49public:
46 MemoryManager(); 50 MemoryManager(VideoCore::RasterizerInterface& rasterizer);
47 51
48 GPUVAddr AllocateSpace(u64 size, u64 align); 52 GPUVAddr AllocateSpace(u64 size, u64 align);
49 GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); 53 GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -144,6 +148,7 @@ private:
144 148
145 Common::PageTable page_table{page_bits}; 149 Common::PageTable page_table{page_bits};
146 VMAMap vma_map; 150 VMAMap vma_map;
151 VideoCore::RasterizerInterface& rasterizer;
147}; 152};
148 153
149} // namespace Tegra 154} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a1a51f226..3ea08ef7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@
21 21
22namespace OpenGL::GLShader { 22namespace OpenGL::GLShader {
23 23
24namespace {
25
24using Tegra::Shader::Attribute; 26using Tegra::Shader::Attribute;
25using Tegra::Shader::AttributeUse; 27using Tegra::Shader::AttributeUse;
26using Tegra::Shader::Header; 28using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
34using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; 36using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
35using Operation = const OperationNode&; 37using Operation = const OperationNode&;
36 38
39enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
40
41struct TextureAoffi {};
42using TextureArgument = std::pair<Type, Node>;
43using TextureIR = std::variant<TextureAoffi, TextureArgument>;
44
37enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; 45enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
38constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 46constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
39 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 47 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
40constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = 48constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
41 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); 49 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
42 50
43enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
44
45class ShaderWriter { 51class ShaderWriter {
46public: 52public:
47 void AddExpression(std::string_view text) { 53 void AddExpression(std::string_view text) {
@@ -91,7 +97,7 @@ private:
91}; 97};
92 98
93/// Generates code to use for a swizzle operation. 99/// Generates code to use for a swizzle operation.
94static std::string GetSwizzle(u32 elem) { 100std::string GetSwizzle(u32 elem) {
95 ASSERT(elem <= 3); 101 ASSERT(elem <= 3);
96 std::string swizzle = "."; 102 std::string swizzle = ".";
97 swizzle += "xyzw"[elem]; 103 swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
99} 105}
100 106
101/// Translate topology 107/// Translate topology
102static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { 108std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
103 switch (topology) { 109 switch (topology) {
104 case Tegra::Shader::OutputTopology::PointList: 110 case Tegra::Shader::OutputTopology::PointList:
105 return "points"; 111 return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
114} 120}
115 121
116/// Returns true if an object has to be treated as precise 122/// Returns true if an object has to be treated as precise
117static bool IsPrecise(Operation operand) { 123bool IsPrecise(Operation operand) {
118 const auto& meta = operand.GetMeta(); 124 const auto& meta = operand.GetMeta();
119 125
120 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { 126 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
126 return false; 132 return false;
127} 133}
128 134
129static bool IsPrecise(Node node) { 135bool IsPrecise(Node node) {
130 if (const auto operation = std::get_if<OperationNode>(node)) { 136 if (const auto operation = std::get_if<OperationNode>(node)) {
131 return IsPrecise(*operation); 137 return IsPrecise(*operation);
132 } 138 }
@@ -723,8 +729,8 @@ private:
723 result_type)); 729 result_type));
724 } 730 }
725 731
726 std::string GenerateTexture(Operation operation, const std::string& func, 732 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
727 const std::vector<std::pair<Type, Node>>& extras) { 733 const std::vector<TextureIR>& extras) {
728 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 734 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
729 735
730 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 736 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -734,11 +740,11 @@ private:
734 const bool has_array = meta->sampler.IsArray(); 740 const bool has_array = meta->sampler.IsArray();
735 const bool has_shadow = meta->sampler.IsShadow(); 741 const bool has_shadow = meta->sampler.IsShadow();
736 742
737 std::string expr = func; 743 std::string expr = "texture" + function_suffix;
738 expr += '('; 744 if (!meta->aoffi.empty()) {
739 expr += GetSampler(meta->sampler); 745 expr += "Offset";
740 expr += ", "; 746 }
741 747 expr += '(' + GetSampler(meta->sampler) + ", ";
742 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); 748 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
743 expr += '('; 749 expr += '(';
744 for (std::size_t i = 0; i < count; ++i) { 750 for (std::size_t i = 0; i < count; ++i) {
@@ -756,36 +762,74 @@ private:
756 } 762 }
757 expr += ')'; 763 expr += ')';
758 764
759 for (const auto& extra_pair : extras) { 765 for (const auto& variant : extras) {
760 const auto [type, operand] = extra_pair; 766 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
761 if (operand == nullptr) { 767 expr += GenerateTextureArgument(*argument);
762 continue; 768 } else if (std::get_if<TextureAoffi>(&variant)) {
769 expr += GenerateTextureAoffi(meta->aoffi);
770 } else {
771 UNREACHABLE();
763 } 772 }
764 expr += ", "; 773 }
765 774
766 switch (type) { 775 return expr + ')';
767 case Type::Int: 776 }
768 if (const auto immediate = std::get_if<ImmediateNode>(operand)) { 777
769 // Inline the string as an immediate integer in GLSL (some extra arguments are 778 std::string GenerateTextureArgument(TextureArgument argument) {
770 // required to be constant) 779 const auto [type, operand] = argument;
771 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 780 if (operand == nullptr) {
772 } else { 781 return {};
773 expr += "ftoi(" + Visit(operand) + ')'; 782 }
774 } 783
775 break; 784 std::string expr = ", ";
776 case Type::Float: 785 switch (type) {
777 expr += Visit(operand); 786 case Type::Int:
778 break; 787 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
779 default: { 788 // Inline the string as an immediate integer in GLSL (some extra arguments are
780 const auto type_int = static_cast<u32>(type); 789 // required to be constant)
781 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); 790 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
782 expr += '0'; 791 } else {
783 break; 792 expr += "ftoi(" + Visit(operand) + ')';
793 }
794 break;
795 case Type::Float:
796 expr += Visit(operand);
797 break;
798 default: {
799 const auto type_int = static_cast<u32>(type);
800 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
801 expr += '0';
802 break;
803 }
804 }
805 return expr;
806 }
807
808 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
809 if (aoffi.empty()) {
810 return {};
811 }
812 constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
813 std::string expr = ", ";
814 expr += coord_constructors.at(aoffi.size() - 1);
815 expr += '(';
816
817 for (std::size_t index = 0; index < aoffi.size(); ++index) {
818 const auto operand{aoffi.at(index)};
819 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
820 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
821 // to be constant by the standard).
822 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
823 } else {
824 expr += "ftoi(" + Visit(operand) + ')';
784 } 825 }
826 if (index + 1 < aoffi.size()) {
827 expr += ", ";
785 } 828 }
786 } 829 }
830 expr += ')';
787 831
788 return expr + ')'; 832 return expr;
789 } 833 }
790 834
791 std::string Assign(Operation operation) { 835 std::string Assign(Operation operation) {
@@ -1164,7 +1208,8 @@ private:
1164 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1208 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1165 ASSERT(meta); 1209 ASSERT(meta);
1166 1210
1167 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); 1211 std::string expr = GenerateTexture(
1212 operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
1168 if (meta->sampler.IsShadow()) { 1213 if (meta->sampler.IsShadow()) {
1169 expr = "vec4(" + expr + ')'; 1214 expr = "vec4(" + expr + ')';
1170 } 1215 }
@@ -1175,7 +1220,8 @@ private:
1175 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1220 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1176 ASSERT(meta); 1221 ASSERT(meta);
1177 1222
1178 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); 1223 std::string expr = GenerateTexture(
1224 operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
1179 if (meta->sampler.IsShadow()) { 1225 if (meta->sampler.IsShadow()) {
1180 expr = "vec4(" + expr + ')'; 1226 expr = "vec4(" + expr + ')';
1181 } 1227 }
@@ -1187,7 +1233,8 @@ private:
1187 ASSERT(meta); 1233 ASSERT(meta);
1188 1234
1189 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; 1235 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1190 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + 1236 return GenerateTexture(operation, "Gather",
1237 {TextureArgument{type, meta->component}, TextureAoffi{}}) +
1191 GetSwizzle(meta->element); 1238 GetSwizzle(meta->element);
1192 } 1239 }
1193 1240
@@ -1217,8 +1264,8 @@ private:
1217 ASSERT(meta); 1264 ASSERT(meta);
1218 1265
1219 if (meta->element < 2) { 1266 if (meta->element < 2) {
1220 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + 1267 return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
1221 " * vec2(256))" + GetSwizzle(meta->element) + "))"; 1268 GetSwizzle(meta->element) + "))";
1222 } 1269 }
1223 return "0"; 1270 return "0";
1224 } 1271 }
@@ -1571,6 +1618,8 @@ private:
1571 ShaderWriter code; 1618 ShaderWriter code;
1572}; 1619};
1573 1620
1621} // Anonymous namespace
1622
1574std::string GetCommonDeclarations() { 1623std::string GetCommonDeclarations() {
1575 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); 1624 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
1576 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); 1625 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index d2d979997..8a43eb157 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -10,8 +10,8 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/file_util.h" 11#include "common/file_util.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/lz4_compression.h"
14#include "common/scm_rev.h" 13#include "common/scm_rev.h"
14#include "common/zstd_compression.h"
15 15
16#include "core/core.h" 16#include "core/core.h"
17#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
@@ -259,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
259 return {}; 259 return {};
260 } 260 }
261 261
262 dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length); 262 dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
263 if (dump.binary.empty()) { 263 if (dump.binary.empty()) {
264 return {}; 264 return {};
265 } 265 }
@@ -288,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
288 return {}; 288 return {};
289 } 289 }
290 290
291 const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size); 291 const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
292 if (code.empty()) { 292 if (code.empty()) {
293 return {}; 293 return {};
294 } 294 }
@@ -474,8 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
474 if (!IsUsable()) 474 if (!IsUsable())
475 return; 475 return;
476 476
477 const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC( 477 const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
478 reinterpret_cast<const u8*>(code.data()), code.size(), 9)}; 478 reinterpret_cast<const u8*>(code.data()), code.size())};
479 if (compressed_code.empty()) { 479 if (compressed_code.empty()) {
480 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", 480 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
481 unique_identifier); 481 unique_identifier);
@@ -506,7 +506,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
506 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 506 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
507 507
508 const std::vector<u8> compressed_binary = 508 const std::vector<u8> compressed_binary =
509 Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9); 509 Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
510 510
511 if (compressed_binary.empty()) { 511 if (compressed_binary.empty()) {
512 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", 512 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 8eef2a920..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -62,7 +62,6 @@ public:
62 UpdatePipeline(); 62 UpdatePipeline();
63 state.draw.shader_program = 0; 63 state.draw.shader_program = 0;
64 state.draw.program_pipeline = pipeline.handle; 64 state.draw.program_pipeline = pipeline.handle;
65 state.geometry_shaders.enabled = (gs != 0);
66 } 65 }
67 66
68private: 67private:
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 9419326a3..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,16 +10,62 @@
10 10
11namespace OpenGL { 11namespace OpenGL {
12 12
13OpenGLState OpenGLState::cur_state; 13using Maxwell = Tegra::Engines::Maxwell3D::Regs;
14 14
15OpenGLState OpenGLState::cur_state;
15bool OpenGLState::s_rgb_used; 16bool OpenGLState::s_rgb_used;
16 17
18namespace {
19
20template <typename T>
21bool UpdateValue(T& current_value, const T new_value) {
22 const bool changed = current_value != new_value;
23 current_value = new_value;
24 return changed;
25}
26
27template <typename T1, typename T2>
28bool UpdateTie(T1 current_value, const T2 new_value) {
29 const bool changed = current_value != new_value;
30 current_value = new_value;
31 return changed;
32}
33
34void Enable(GLenum cap, bool enable) {
35 if (enable) {
36 glEnable(cap);
37 } else {
38 glDisable(cap);
39 }
40}
41
42void Enable(GLenum cap, GLuint index, bool enable) {
43 if (enable) {
44 glEnablei(cap, index);
45 } else {
46 glDisablei(cap, index);
47 }
48}
49
50void Enable(GLenum cap, bool& current_value, bool new_value) {
51 if (UpdateValue(current_value, new_value))
52 Enable(cap, new_value);
53}
54
55void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
56 if (UpdateValue(current_value, new_value))
57 Enable(cap, index, new_value);
58}
59
60} // namespace
61
17OpenGLState::OpenGLState() { 62OpenGLState::OpenGLState() {
18 // These all match default OpenGL values 63 // These all match default OpenGL values
19 geometry_shaders.enabled = false;
20 framebuffer_srgb.enabled = false; 64 framebuffer_srgb.enabled = false;
65
21 multisample_control.alpha_to_coverage = false; 66 multisample_control.alpha_to_coverage = false;
22 multisample_control.alpha_to_one = false; 67 multisample_control.alpha_to_one = false;
68
23 cull.enabled = false; 69 cull.enabled = false;
24 cull.mode = GL_BACK; 70 cull.mode = GL_BACK;
25 cull.front_face = GL_CCW; 71 cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {
30 76
31 primitive_restart.enabled = false; 77 primitive_restart.enabled = false;
32 primitive_restart.index = 0; 78 primitive_restart.index = 0;
79
33 for (auto& item : color_mask) { 80 for (auto& item : color_mask) {
34 item.red_enabled = GL_TRUE; 81 item.red_enabled = GL_TRUE;
35 item.green_enabled = GL_TRUE; 82 item.green_enabled = GL_TRUE;
36 item.blue_enabled = GL_TRUE; 83 item.blue_enabled = GL_TRUE;
37 item.alpha_enabled = GL_TRUE; 84 item.alpha_enabled = GL_TRUE;
38 } 85 }
39 stencil.test_enabled = false; 86
40 auto reset_stencil = [](auto& config) { 87 const auto ResetStencil = [](auto& config) {
41 config.test_func = GL_ALWAYS; 88 config.test_func = GL_ALWAYS;
42 config.test_ref = 0; 89 config.test_ref = 0;
43 config.test_mask = 0xFFFFFFFF; 90 config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
46 config.action_depth_pass = GL_KEEP; 93 config.action_depth_pass = GL_KEEP;
47 config.action_stencil_fail = GL_KEEP; 94 config.action_stencil_fail = GL_KEEP;
48 }; 95 };
49 reset_stencil(stencil.front); 96 stencil.test_enabled = false;
50 reset_stencil(stencil.back); 97 ResetStencil(stencil.front);
98 ResetStencil(stencil.back);
99
51 for (auto& item : viewports) { 100 for (auto& item : viewports) {
52 item.x = 0; 101 item.x = 0;
53 item.y = 0; 102 item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
61 item.scissor.width = 0; 110 item.scissor.width = 0;
62 item.scissor.height = 0; 111 item.scissor.height = 0;
63 } 112 }
113
64 for (auto& item : blend) { 114 for (auto& item : blend) {
65 item.enabled = true; 115 item.enabled = true;
66 item.rgb_equation = GL_FUNC_ADD; 116 item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
70 item.src_a_func = GL_ONE; 120 item.src_a_func = GL_ONE;
71 item.dst_a_func = GL_ZERO; 121 item.dst_a_func = GL_ZERO;
72 } 122 }
123
73 independant_blend.enabled = false; 124 independant_blend.enabled = false;
125
74 blend_color.red = 0.0f; 126 blend_color.red = 0.0f;
75 blend_color.green = 0.0f; 127 blend_color.green = 0.0f;
76 blend_color.blue = 0.0f; 128 blend_color.blue = 0.0f;
77 blend_color.alpha = 0.0f; 129 blend_color.alpha = 0.0f;
130
78 logic_op.enabled = false; 131 logic_op.enabled = false;
79 logic_op.operation = GL_COPY; 132 logic_op.operation = GL_COPY;
80 133
@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
91 clip_distance = {}; 144 clip_distance = {};
92 145
93 point.size = 1; 146 point.size = 1;
147
94 fragment_color_clamp.enabled = false; 148 fragment_color_clamp.enabled = false;
149
95 depth_clamp.far_plane = false; 150 depth_clamp.far_plane = false;
96 depth_clamp.near_plane = false; 151 depth_clamp.near_plane = false;
152
97 polygon_offset.fill_enable = false; 153 polygon_offset.fill_enable = false;
98 polygon_offset.line_enable = false; 154 polygon_offset.line_enable = false;
99 polygon_offset.point_enable = false; 155 polygon_offset.point_enable = false;
@@ -103,260 +159,255 @@ OpenGLState::OpenGLState() {
103} 159}
104 160
105void OpenGLState::ApplyDefaultState() { 161void OpenGLState::ApplyDefaultState() {
162 glEnable(GL_BLEND);
106 glDisable(GL_FRAMEBUFFER_SRGB); 163 glDisable(GL_FRAMEBUFFER_SRGB);
107 glDisable(GL_CULL_FACE); 164 glDisable(GL_CULL_FACE);
108 glDisable(GL_DEPTH_TEST); 165 glDisable(GL_DEPTH_TEST);
109 glDisable(GL_PRIMITIVE_RESTART); 166 glDisable(GL_PRIMITIVE_RESTART);
110 glDisable(GL_STENCIL_TEST); 167 glDisable(GL_STENCIL_TEST);
111 glEnable(GL_BLEND);
112 glDisable(GL_COLOR_LOGIC_OP); 168 glDisable(GL_COLOR_LOGIC_OP);
113 glDisable(GL_SCISSOR_TEST); 169 glDisable(GL_SCISSOR_TEST);
114} 170}
115 171
172void OpenGLState::ApplyFramebufferState() const {
173 if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
174 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
175 }
176 if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
177 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
178 }
179}
180
181void OpenGLState::ApplyVertexArrayState() const {
182 if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
183 glBindVertexArray(draw.vertex_array);
184 }
185}
186
187void OpenGLState::ApplyShaderProgram() const {
188 if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
189 glUseProgram(draw.shader_program);
190 }
191}
192
193void OpenGLState::ApplyProgramPipeline() const {
194 if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
195 glBindProgramPipeline(draw.program_pipeline);
196 }
197}
198
199void OpenGLState::ApplyClipDistances() const {
200 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
201 Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
202 clip_distance[i]);
203 }
204}
205
206void OpenGLState::ApplyPointSize() const {
207 if (UpdateValue(cur_state.point.size, point.size)) {
208 glPointSize(point.size);
209 }
210}
211
212void OpenGLState::ApplyFragmentColorClamp() const {
213 if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
214 glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
215 fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
216 }
217}
218
219void OpenGLState::ApplyMultisample() const {
220 Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
221 multisample_control.alpha_to_coverage);
222 Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
223 multisample_control.alpha_to_one);
224}
225
226void OpenGLState::ApplyDepthClamp() const {
227 if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
228 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
229 return;
230 }
231 cur_state.depth_clamp = depth_clamp;
232
233 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
234 "Unimplemented Depth Clamp Separation!");
235
236 Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
237}
238
116void OpenGLState::ApplySRgb() const { 239void OpenGLState::ApplySRgb() const {
117 if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { 240 if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
118 if (framebuffer_srgb.enabled) { 241 return;
119 // Track if sRGB is used 242 cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
120 s_rgb_used = true; 243 if (framebuffer_srgb.enabled) {
121 glEnable(GL_FRAMEBUFFER_SRGB); 244 // Track if sRGB is used
122 } else { 245 s_rgb_used = true;
123 glDisable(GL_FRAMEBUFFER_SRGB); 246 glEnable(GL_FRAMEBUFFER_SRGB);
124 } 247 } else {
248 glDisable(GL_FRAMEBUFFER_SRGB);
125 } 249 }
126} 250}
127 251
128void OpenGLState::ApplyCulling() const { 252void OpenGLState::ApplyCulling() const {
129 if (cull.enabled != cur_state.cull.enabled) { 253 Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
130 if (cull.enabled) {
131 glEnable(GL_CULL_FACE);
132 } else {
133 glDisable(GL_CULL_FACE);
134 }
135 }
136 254
137 if (cull.mode != cur_state.cull.mode) { 255 if (UpdateValue(cur_state.cull.mode, cull.mode)) {
138 glCullFace(cull.mode); 256 glCullFace(cull.mode);
139 } 257 }
140 258
141 if (cull.front_face != cur_state.cull.front_face) { 259 if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
142 glFrontFace(cull.front_face); 260 glFrontFace(cull.front_face);
143 } 261 }
144} 262}
145 263
146void OpenGLState::ApplyColorMask() const { 264void OpenGLState::ApplyColorMask() const {
147 if (independant_blend.enabled) { 265 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
148 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 266 const auto& updated = color_mask[i];
149 const auto& updated = color_mask[i]; 267 auto& current = cur_state.color_mask[i];
150 const auto& current = cur_state.color_mask[i];
151 if (updated.red_enabled != current.red_enabled ||
152 updated.green_enabled != current.green_enabled ||
153 updated.blue_enabled != current.blue_enabled ||
154 updated.alpha_enabled != current.alpha_enabled) {
155 glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
156 updated.blue_enabled, updated.alpha_enabled);
157 }
158 }
159 } else {
160 const auto& updated = color_mask[0];
161 const auto& current = cur_state.color_mask[0];
162 if (updated.red_enabled != current.red_enabled || 268 if (updated.red_enabled != current.red_enabled ||
163 updated.green_enabled != current.green_enabled || 269 updated.green_enabled != current.green_enabled ||
164 updated.blue_enabled != current.blue_enabled || 270 updated.blue_enabled != current.blue_enabled ||
165 updated.alpha_enabled != current.alpha_enabled) { 271 updated.alpha_enabled != current.alpha_enabled) {
166 glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, 272 current = updated;
167 updated.alpha_enabled); 273 glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
274 updated.blue_enabled, updated.alpha_enabled);
168 } 275 }
169 } 276 }
170} 277}
171 278
172void OpenGLState::ApplyDepth() const { 279void OpenGLState::ApplyDepth() const {
173 if (depth.test_enabled != cur_state.depth.test_enabled) { 280 Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
174 if (depth.test_enabled) {
175 glEnable(GL_DEPTH_TEST);
176 } else {
177 glDisable(GL_DEPTH_TEST);
178 }
179 }
180 281
181 if (depth.test_func != cur_state.depth.test_func) { 282 if (cur_state.depth.test_func != depth.test_func) {
283 cur_state.depth.test_func = depth.test_func;
182 glDepthFunc(depth.test_func); 284 glDepthFunc(depth.test_func);
183 } 285 }
184 286
185 if (depth.write_mask != cur_state.depth.write_mask) { 287 if (cur_state.depth.write_mask != depth.write_mask) {
288 cur_state.depth.write_mask = depth.write_mask;
186 glDepthMask(depth.write_mask); 289 glDepthMask(depth.write_mask);
187 } 290 }
188} 291}
189 292
190void OpenGLState::ApplyPrimitiveRestart() const { 293void OpenGLState::ApplyPrimitiveRestart() const {
191 if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { 294 Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
192 if (primitive_restart.enabled) {
193 glEnable(GL_PRIMITIVE_RESTART);
194 } else {
195 glDisable(GL_PRIMITIVE_RESTART);
196 }
197 }
198 295
199 if (primitive_restart.index != cur_state.primitive_restart.index) { 296 if (cur_state.primitive_restart.index != primitive_restart.index) {
297 cur_state.primitive_restart.index = primitive_restart.index;
200 glPrimitiveRestartIndex(primitive_restart.index); 298 glPrimitiveRestartIndex(primitive_restart.index);
201 } 299 }
202} 300}
203 301
204void OpenGLState::ApplyStencilTest() const { 302void OpenGLState::ApplyStencilTest() const {
205 if (stencil.test_enabled != cur_state.stencil.test_enabled) { 303 Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
206 if (stencil.test_enabled) { 304
207 glEnable(GL_STENCIL_TEST); 305 const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
208 } else { 306 if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
209 glDisable(GL_STENCIL_TEST); 307 current.test_mask != config.test_mask) {
210 } 308 current.test_func = config.test_func;
211 } 309 current.test_ref = config.test_ref;
212 310 current.test_mask = config.test_mask;
213 const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
214 if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
215 config.test_mask != prev_config.test_mask) {
216 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); 311 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
217 } 312 }
218 if (config.action_depth_fail != prev_config.action_depth_fail || 313 if (current.action_depth_fail != config.action_depth_fail ||
219 config.action_depth_pass != prev_config.action_depth_pass || 314 current.action_depth_pass != config.action_depth_pass ||
220 config.action_stencil_fail != prev_config.action_stencil_fail) { 315 current.action_stencil_fail != config.action_stencil_fail) {
316 current.action_depth_fail = config.action_depth_fail;
317 current.action_depth_pass = config.action_depth_pass;
318 current.action_stencil_fail = config.action_stencil_fail;
221 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, 319 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
222 config.action_depth_pass); 320 config.action_depth_pass);
223 } 321 }
224 if (config.write_mask != prev_config.write_mask) { 322 if (current.write_mask != config.write_mask) {
323 current.write_mask = config.write_mask;
225 glStencilMaskSeparate(face, config.write_mask); 324 glStencilMaskSeparate(face, config.write_mask);
226 } 325 }
227 }; 326 };
228 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); 327 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
229 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); 328 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
230} 329}
231// Viewport does not affects glClearBuffer so emulate viewport using scissor test
232void OpenGLState::EmulateViewportWithScissor() {
233 auto& current = viewports[0];
234 if (current.scissor.enabled) {
235 const GLint left = std::max(current.x, current.scissor.x);
236 const GLint right =
237 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
238 const GLint bottom = std::max(current.y, current.scissor.y);
239 const GLint top =
240 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
241 current.scissor.x = std::max(left, 0);
242 current.scissor.y = std::max(bottom, 0);
243 current.scissor.width = std::max(right - left, 0);
244 current.scissor.height = std::max(top - bottom, 0);
245 } else {
246 current.scissor.enabled = true;
247 current.scissor.x = current.x;
248 current.scissor.y = current.y;
249 current.scissor.width = current.width;
250 current.scissor.height = current.height;
251 }
252}
253 330
254void OpenGLState::ApplyViewport() const { 331void OpenGLState::ApplyViewport() const {
255 if (geometry_shaders.enabled) { 332 for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
256 for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); 333 const auto& updated = viewports[i];
257 i++) { 334 auto& current = cur_state.viewports[i];
258 const auto& current = cur_state.viewports[i]; 335
259 const auto& updated = viewports[i]; 336 if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
260 if (updated.x != current.x || updated.y != current.y || 337 current.height != updated.height) {
261 updated.width != current.width || updated.height != current.height) { 338 current.x = updated.x;
262 glViewportIndexedf( 339 current.y = updated.y;
263 i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), 340 current.width = updated.width;
264 static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height)); 341 current.height = updated.height;
265 } 342 glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
266 if (updated.depth_range_near != current.depth_range_near || 343 static_cast<GLfloat>(updated.width),
267 updated.depth_range_far != current.depth_range_far) { 344 static_cast<GLfloat>(updated.height));
268 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
269 }
270
271 if (updated.scissor.enabled != current.scissor.enabled) {
272 if (updated.scissor.enabled) {
273 glEnablei(GL_SCISSOR_TEST, i);
274 } else {
275 glDisablei(GL_SCISSOR_TEST, i);
276 }
277 }
278
279 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
280 updated.scissor.width != current.scissor.width ||
281 updated.scissor.height != current.scissor.height) {
282 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
283 updated.scissor.height);
284 }
285 }
286 } else {
287 const auto& current = cur_state.viewports[0];
288 const auto& updated = viewports[0];
289 if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
290 updated.height != current.height) {
291 glViewport(updated.x, updated.y, updated.width, updated.height);
292 }
293
294 if (updated.depth_range_near != current.depth_range_near ||
295 updated.depth_range_far != current.depth_range_far) {
296 glDepthRange(updated.depth_range_near, updated.depth_range_far);
297 } 345 }
298 346 if (current.depth_range_near != updated.depth_range_near ||
299 if (updated.scissor.enabled != current.scissor.enabled) { 347 current.depth_range_far != updated.depth_range_far) {
300 if (updated.scissor.enabled) { 348 current.depth_range_near = updated.depth_range_near;
301 glEnable(GL_SCISSOR_TEST); 349 current.depth_range_far = updated.depth_range_far;
302 } else { 350 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
303 glDisable(GL_SCISSOR_TEST);
304 }
305 } 351 }
306 352
307 if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || 353 Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
308 updated.scissor.width != current.scissor.width || 354
309 updated.scissor.height != current.scissor.height) { 355 if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
310 glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, 356 current.scissor.width != updated.scissor.width ||
311 updated.scissor.height); 357 current.scissor.height != updated.scissor.height) {
358 current.scissor.x = updated.scissor.x;
359 current.scissor.y = updated.scissor.y;
360 current.scissor.width = updated.scissor.width;
361 current.scissor.height = updated.scissor.height;
362 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
363 updated.scissor.height);
312 } 364 }
313 } 365 }
314} 366}
315 367
316void OpenGLState::ApplyGlobalBlending() const { 368void OpenGLState::ApplyGlobalBlending() const {
317 const Blend& current = cur_state.blend[0];
318 const Blend& updated = blend[0]; 369 const Blend& updated = blend[0];
319 if (updated.enabled != current.enabled) { 370 Blend& current = cur_state.blend[0];
320 if (updated.enabled) { 371
321 glEnable(GL_BLEND); 372 Enable(GL_BLEND, current.enabled, updated.enabled);
322 } else { 373
323 glDisable(GL_BLEND); 374 if (current.src_rgb_func != updated.src_rgb_func ||
324 } 375 current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
325 } 376 current.dst_a_func != updated.dst_a_func) {
326 if (!updated.enabled) { 377 current.src_rgb_func = updated.src_rgb_func;
327 return; 378 current.dst_rgb_func = updated.dst_rgb_func;
328 } 379 current.src_a_func = updated.src_a_func;
329 if (updated.src_rgb_func != current.src_rgb_func || 380 current.dst_a_func = updated.dst_a_func;
330 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
331 updated.dst_a_func != current.dst_a_func) {
332 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, 381 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
333 updated.dst_a_func); 382 updated.dst_a_func);
334 } 383 }
335 384
336 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { 385 if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
386 current.rgb_equation = updated.rgb_equation;
387 current.a_equation = updated.a_equation;
337 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); 388 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
338 } 389 }
339} 390}
340 391
341void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { 392void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
342 const Blend& updated = blend[target]; 393 const Blend& updated = blend[target];
343 const Blend& current = cur_state.blend[target]; 394 Blend& current = cur_state.blend[target];
344 if (updated.enabled != current.enabled || force) { 395
345 if (updated.enabled) { 396 if (current.enabled != updated.enabled || force) {
346 glEnablei(GL_BLEND, static_cast<GLuint>(target)); 397 current.enabled = updated.enabled;
347 } else { 398 Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
348 glDisablei(GL_BLEND, static_cast<GLuint>(target));
349 }
350 } 399 }
351 400
352 if (updated.src_rgb_func != current.src_rgb_func || 401 if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
353 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 402 current.dst_a_func),
354 updated.dst_a_func != current.dst_a_func) { 403 std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
404 updated.dst_a_func))) {
355 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, 405 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
356 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); 406 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
357 } 407 }
358 408
359 if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { 409 if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
410 std::tie(updated.rgb_equation, updated.a_equation))) {
360 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, 411 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
361 updated.a_equation); 412 updated.a_equation);
362 } 413 }
@@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
364 415
365void OpenGLState::ApplyBlending() const { 416void OpenGLState::ApplyBlending() const {
366 if (independant_blend.enabled) { 417 if (independant_blend.enabled) {
367 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 418 const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
368 ApplyTargetBlending(i, 419 for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
369 independant_blend.enabled != cur_state.independant_blend.enabled); 420 ApplyTargetBlending(target, force);
370 } 421 }
371 } else { 422 } else {
372 ApplyGlobalBlending(); 423 ApplyGlobalBlending();
373 } 424 }
374 if (blend_color.red != cur_state.blend_color.red || 425 cur_state.independant_blend.enabled = independant_blend.enabled;
375 blend_color.green != cur_state.blend_color.green || 426
376 blend_color.blue != cur_state.blend_color.blue || 427 if (UpdateTie(
377 blend_color.alpha != cur_state.blend_color.alpha) { 428 std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
429 cur_state.blend_color.blue, cur_state.blend_color.alpha),
430 std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
378 glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); 431 glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
379 } 432 }
380} 433}
381 434
382void OpenGLState::ApplyLogicOp() const { 435void OpenGLState::ApplyLogicOp() const {
383 if (logic_op.enabled != cur_state.logic_op.enabled) { 436 Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
384 if (logic_op.enabled) {
385 glEnable(GL_COLOR_LOGIC_OP);
386 } else {
387 glDisable(GL_COLOR_LOGIC_OP);
388 }
389 }
390 437
391 if (logic_op.operation != cur_state.logic_op.operation) { 438 if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
392 glLogicOp(logic_op.operation); 439 glLogicOp(logic_op.operation);
393 } 440 }
394} 441}
395 442
396void OpenGLState::ApplyPolygonOffset() const { 443void OpenGLState::ApplyPolygonOffset() const {
397 const bool fill_enable_changed = 444 Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
398 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; 445 polygon_offset.fill_enable);
399 const bool line_enable_changed = 446 Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
400 polygon_offset.line_enable != cur_state.polygon_offset.line_enable; 447 polygon_offset.line_enable);
401 const bool point_enable_changed = 448 Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
402 polygon_offset.point_enable != cur_state.polygon_offset.point_enable; 449 polygon_offset.point_enable);
403 const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; 450
404 const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; 451 if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
405 const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; 452 cur_state.polygon_offset.clamp),
406 453 std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
407 if (fill_enable_changed) {
408 if (polygon_offset.fill_enable) {
409 glEnable(GL_POLYGON_OFFSET_FILL);
410 } else {
411 glDisable(GL_POLYGON_OFFSET_FILL);
412 }
413 }
414
415 if (line_enable_changed) {
416 if (polygon_offset.line_enable) {
417 glEnable(GL_POLYGON_OFFSET_LINE);
418 } else {
419 glDisable(GL_POLYGON_OFFSET_LINE);
420 }
421 }
422
423 if (point_enable_changed) {
424 if (polygon_offset.point_enable) {
425 glEnable(GL_POLYGON_OFFSET_POINT);
426 } else {
427 glDisable(GL_POLYGON_OFFSET_POINT);
428 }
429 }
430
431 if (factor_changed || units_changed || clamp_changed) {
432 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { 454 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
433 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); 455 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
434 } else { 456 } else {
435 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
436 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, 457 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
437 "Unimplemented Depth polygon offset clamp."); 458 "Unimplemented Depth polygon offset clamp.");
459 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
438 } 460 }
439 } 461 }
440} 462}
@@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const {
443 bool has_delta{}; 465 bool has_delta{};
444 std::size_t first{}; 466 std::size_t first{};
445 std::size_t last{}; 467 std::size_t last{};
446 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures; 468 std::array<GLuint, Maxwell::NumTextureSamplers> textures;
447 469
448 for (std::size_t i = 0; i < std::size(texture_units); ++i) { 470 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
449 const auto& texture_unit = texture_units[i]; 471 const auto& texture_unit = texture_units[i];
450 const auto& cur_state_texture_unit = cur_state.texture_units[i]; 472 auto& cur_state_texture_unit = cur_state.texture_units[i];
451 textures[i] = texture_unit.texture; 473 textures[i] = texture_unit.texture;
452 474 if (cur_state_texture_unit.texture == textures[i])
453 if (textures[i] != cur_state_texture_unit.texture) { 475 continue;
454 if (!has_delta) { 476 cur_state_texture_unit.texture = textures[i];
455 first = i; 477 if (!has_delta) {
456 has_delta = true; 478 first = i;
457 } 479 has_delta = true;
458 last = i;
459 } 480 }
481 last = i;
460 } 482 }
461
462 if (has_delta) { 483 if (has_delta) {
463 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 484 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
464 textures.data() + first); 485 textures.data() + first);
@@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const {
469 bool has_delta{}; 490 bool has_delta{};
470 std::size_t first{}; 491 std::size_t first{};
471 std::size_t last{}; 492 std::size_t last{};
472 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; 493 std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
494
473 for (std::size_t i = 0; i < std::size(samplers); ++i) { 495 for (std::size_t i = 0; i < std::size(samplers); ++i) {
496 if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
497 continue;
498 cur_state.texture_units[i].sampler = texture_units[i].sampler;
474 samplers[i] = texture_units[i].sampler; 499 samplers[i] = texture_units[i].sampler;
475 if (samplers[i] != cur_state.texture_units[i].sampler) { 500 if (!has_delta) {
476 if (!has_delta) { 501 first = i;
477 first = i; 502 has_delta = true;
478 has_delta = true;
479 }
480 last = i;
481 } 503 }
504 last = i;
482 } 505 }
483 if (has_delta) { 506 if (has_delta) {
484 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 507 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
@@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const {
486 } 509 }
487} 510}
488 511
489void OpenGLState::ApplyFramebufferState() const {
490 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
491 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
492 }
493 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
494 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
495 }
496}
497
498void OpenGLState::ApplyVertexArrayState() const {
499 if (draw.vertex_array != cur_state.draw.vertex_array) {
500 glBindVertexArray(draw.vertex_array);
501 }
502}
503
504void OpenGLState::ApplyDepthClamp() const {
505 if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
506 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
507 return;
508 }
509 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
510 "Unimplemented Depth Clamp Separation!");
511
512 if (depth_clamp.far_plane || depth_clamp.near_plane) {
513 glEnable(GL_DEPTH_CLAMP);
514 } else {
515 glDisable(GL_DEPTH_CLAMP);
516 }
517}
518
519void OpenGLState::Apply() const { 512void OpenGLState::Apply() const {
520 ApplyFramebufferState(); 513 ApplyFramebufferState();
521 ApplyVertexArrayState(); 514 ApplyVertexArrayState();
522 515 ApplyShaderProgram();
523 // Shader program 516 ApplyProgramPipeline();
524 if (draw.shader_program != cur_state.draw.shader_program) { 517 ApplyClipDistances();
525 glUseProgram(draw.shader_program); 518 ApplyPointSize();
526 } 519 ApplyFragmentColorClamp();
527 520 ApplyMultisample();
528 // Program pipeline
529 if (draw.program_pipeline != cur_state.draw.program_pipeline) {
530 glBindProgramPipeline(draw.program_pipeline);
531 }
532 // Clip distance
533 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
534 if (clip_distance[i] != cur_state.clip_distance[i]) {
535 if (clip_distance[i]) {
536 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
537 } else {
538 glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
539 }
540 }
541 }
542 // Point
543 if (point.size != cur_state.point.size) {
544 glPointSize(point.size);
545 }
546 if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
547 glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
548 fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
549 }
550 if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
551 if (multisample_control.alpha_to_coverage) {
552 glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
553 } else {
554 glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
555 }
556 }
557 if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
558 if (multisample_control.alpha_to_one) {
559 glEnable(GL_SAMPLE_ALPHA_TO_ONE);
560 } else {
561 glDisable(GL_SAMPLE_ALPHA_TO_ONE);
562 }
563 }
564 ApplyDepthClamp(); 521 ApplyDepthClamp();
565 ApplyColorMask(); 522 ApplyColorMask();
566 ApplyViewport(); 523 ApplyViewport();
@@ -574,7 +531,28 @@ void OpenGLState::Apply() const {
574 ApplyTextures(); 531 ApplyTextures();
575 ApplySamplers(); 532 ApplySamplers();
576 ApplyPolygonOffset(); 533 ApplyPolygonOffset();
577 cur_state = *this; 534}
535
536void OpenGLState::EmulateViewportWithScissor() {
537 auto& current = viewports[0];
538 if (current.scissor.enabled) {
539 const GLint left = std::max(current.x, current.scissor.x);
540 const GLint right =
541 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
542 const GLint bottom = std::max(current.y, current.scissor.y);
543 const GLint top =
544 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
545 current.scissor.x = std::max(left, 0);
546 current.scissor.y = std::max(bottom, 0);
547 current.scissor.width = std::max(right - left, 0);
548 current.scissor.height = std::max(top - bottom, 0);
549 } else {
550 current.scissor.enabled = true;
551 current.scissor.x = current.x;
552 current.scissor.y = current.y;
553 current.scissor.width = current.width;
554 current.scissor.height = current.height;
555 }
578} 556}
579 557
580OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { 558OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 9e1eda5b1..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
54 } depth_clamp; // GL_DEPTH_CLAMP 54 } depth_clamp; // GL_DEPTH_CLAMP
55 55
56 struct { 56 struct {
57 bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
58 } geometry_shaders;
59
60 struct {
61 bool enabled; // GL_CULL_FACE 57 bool enabled; // GL_CULL_FACE
62 GLenum mode; // GL_CULL_FACE_MODE 58 GLenum mode; // GL_CULL_FACE_MODE
63 GLenum front_face; // GL_FRONT_FACE 59 GLenum front_face; // GL_FRONT_FACE
@@ -184,34 +180,26 @@ public:
184 static OpenGLState GetCurState() { 180 static OpenGLState GetCurState() {
185 return cur_state; 181 return cur_state;
186 } 182 }
183
187 static bool GetsRGBUsed() { 184 static bool GetsRGBUsed() {
188 return s_rgb_used; 185 return s_rgb_used;
189 } 186 }
187
190 static void ClearsRGBUsed() { 188 static void ClearsRGBUsed() {
191 s_rgb_used = false; 189 s_rgb_used = false;
192 } 190 }
191
193 /// Apply this state as the current OpenGL state 192 /// Apply this state as the current OpenGL state
194 void Apply() const; 193 void Apply() const;
195 /// Apply only the state affecting the framebuffer 194
196 void ApplyFramebufferState() const; 195 void ApplyFramebufferState() const;
197 /// Apply only the state affecting the vertex array
198 void ApplyVertexArrayState() const; 196 void ApplyVertexArrayState() const;
199 /// Set the initial OpenGL state 197 void ApplyShaderProgram() const;
200 static void ApplyDefaultState(); 198 void ApplyProgramPipeline() const;
201 /// Resets any references to the given resource 199 void ApplyClipDistances() const;
202 OpenGLState& UnbindTexture(GLuint handle); 200 void ApplyPointSize() const;
203 OpenGLState& ResetSampler(GLuint handle); 201 void ApplyFragmentColorClamp() const;
204 OpenGLState& ResetProgram(GLuint handle); 202 void ApplyMultisample() const;
205 OpenGLState& ResetPipeline(GLuint handle);
206 OpenGLState& ResetVertexArray(GLuint handle);
207 OpenGLState& ResetFramebuffer(GLuint handle);
208 void EmulateViewportWithScissor();
209
210private:
211 static OpenGLState cur_state;
212 // Workaround for sRGB problems caused by
213 // QT not supporting srgb output
214 static bool s_rgb_used;
215 void ApplySRgb() const; 203 void ApplySRgb() const;
216 void ApplyCulling() const; 204 void ApplyCulling() const;
217 void ApplyColorMask() const; 205 void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
227 void ApplySamplers() const; 215 void ApplySamplers() const;
228 void ApplyDepthClamp() const; 216 void ApplyDepthClamp() const;
229 void ApplyPolygonOffset() const; 217 void ApplyPolygonOffset() const;
218
219 /// Set the initial OpenGL state
220 static void ApplyDefaultState();
221
222 /// Resets any references to the given resource
223 OpenGLState& UnbindTexture(GLuint handle);
224 OpenGLState& ResetSampler(GLuint handle);
225 OpenGLState& ResetProgram(GLuint handle);
226 OpenGLState& ResetPipeline(GLuint handle);
227 OpenGLState& ResetVertexArray(GLuint handle);
228 OpenGLState& ResetFramebuffer(GLuint handle);
229
230 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
231 void EmulateViewportWithScissor();
232
233private:
234 static OpenGLState cur_state;
235
236 // Workaround for sRGB problems caused by QT not supporting srgb output
237 static bool s_rgb_used;
230}; 238};
231 239
232} // namespace OpenGL 240} // namespace OpenGL
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a99ae19bf..a775b402b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h"
11#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h" 14#include "video_core/shader/shader_ir.h"
13 15
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
41 43
42 switch (opcode->get().GetId()) { 44 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: { 45 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { 46 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); 47 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 } 48 }
50 49
51 const TextureType texture_type{instr.tex.texture_type}; 50 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0; 51 const bool is_array = instr.tex.array != 0;
52 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); 53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode(); 54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat( 55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); 56 bb, instr,
57 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
57 break; 58 break;
58 } 59 }
59 case OpCode::Id::TEXS: { 60 case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
78 } 79 }
79 case OpCode::Id::TLD4: { 80 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0); 81 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), 82 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented"); 83 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), 84 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
92 const auto texture_type = instr.tld4.texture_type.Value(); 91 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); 92 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0; 93 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr, 94 const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
96 GetTld4Code(instr, texture_type, depth_compare, is_array)); 95 WriteTexInstructionFloat(
96 bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
97 break; 97 break;
98 } 98 }
99 case OpCode::Id::TLD4S: { 99 case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
127 Node4 values; 127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) { 128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords; 129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; 130 MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 } 132 }
133 133
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
152 if (!instr.txq.IsComponentEnabled(element)) { 152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue; 153 continue;
154 } 154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; 155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
156 const Node value = 156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); 157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value); 158 SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
202 202
203 for (u32 element = 0; element < 2; ++element) { 203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords; 204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; 205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value); 207 SetTemporal(bb, element, value);
208 } 208 }
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
325 325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords, 327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) { 328 Node array, Node depth_compare, u32 bias_offset,
329 std::vector<Node> aoffi) {
329 const bool is_array = array; 330 const bool is_array = array;
330 const bool is_shadow = depth_compare; 331 const bool is_shadow = depth_compare;
331 332
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
374 Node4 values; 375 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) { 376 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords; 377 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; 378 MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords)); 379 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 } 380 }
380 381
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
382} 383}
383 384
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, 385Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) { 386 TextureProcessMode process_mode, bool depth_compare, bool is_array,
386 const bool lod_bias_enabled = 387 bool is_aoffi) {
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); 388 const bool lod_bias_enabled{
389 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
390
391 u64 parameter_register = instr.gpr20.Value();
392 if (lod_bias_enabled) {
393 ++parameter_register;
394 }
388 395
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( 396 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); 397 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
404 411
405 const Node array = is_array ? GetRegister(array_register) : nullptr; 412 const Node array = is_array ? GetRegister(array_register) : nullptr;
406 413
414 std::vector<Node> aoffi;
415 if (is_aoffi) {
416 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
417 }
418
407 Node dc{}; 419 Node dc{};
408 if (depth_compare) { 420 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod 421 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used 422 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); 423 dc = GetRegister(parameter_register++);
412 dc = GetRegister(depth_register);
413 } 424 }
414 425
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); 426 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
416} 427}
417 428
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, 429Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
448 dc = GetRegister(depth_register); 459 dc = GetRegister(depth_register);
449 } 460 }
450 461
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); 462 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
452} 463}
453 464
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, 465Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) { 466 bool is_array, bool is_aoffi) {
456 const std::size_t coord_count = GetCoordCount(texture_type); 467 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); 468 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); 469 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
463 const u64 coord_register = array_register + (is_array ? 1 : 0); 474 const u64 coord_register = array_register + (is_array ? 1 : 0);
464 475
465 std::vector<Node> coords; 476 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i) 477 for (std::size_t i = 0; i < coord_count; ++i) {
467 coords.push_back(GetRegister(coord_register + i)); 478 coords.push_back(GetRegister(coord_register + i));
479 }
480
481 u64 parameter_register = instr.gpr20.Value();
482 std::vector<Node> aoffi;
483 if (is_aoffi) {
484 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
485 }
486
487 Node dc{};
488 if (depth_compare) {
489 dc = GetRegister(parameter_register++);
490 }
468 491
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); 492 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470 493
471 Node4 values; 494 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) { 495 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords; 496 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; 497 MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 498 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 } 499 }
477 500
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
507 Node4 values; 530 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) { 531 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords; 532 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; 533 MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); 534 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 } 535 }
513 return values; 536 return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
531 return {coord_count, total_coord_count}; 554 return {coord_count, total_coord_count};
532} 555}
533 556
534} // namespace VideoCommon::Shader \ No newline at end of file 557std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
558 bool is_tld4) {
559 const auto [coord_offsets, size, wrap_value,
560 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
561 if (is_tld4) {
562 return {{0, 8, 16}, 6, 32, 64};
563 } else {
564 return {{0, 4, 8}, 4, 8, 16};
565 }
566 }();
567 const u32 mask = (1U << size) - 1;
568
569 std::vector<Node> aoffi;
570 aoffi.reserve(coord_count);
571
572 const auto aoffi_immediate{
573 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
574 if (!aoffi_immediate) {
575 // Variable access, not supported on AMD.
576 LOG_WARNING(HW_GPU,
577 "AOFFI constant folding failed, some hardware might have graphical issues");
578 for (std::size_t coord = 0; coord < coord_count; ++coord) {
579 const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
580 const Node condition =
581 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
582 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
583 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
584 }
585 return aoffi;
586 }
587
588 for (std::size_t coord = 0; coord < coord_count; ++coord) {
589 s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
590 if (value >= wrap_value) {
591 value -= diff_value;
592 }
593 aoffi.push_back(Immediate(value));
594 }
595 return aoffi;
596}
597
598} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5bc3a3900..4888998d3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <cstring> 8#include <cstring>
9#include <map> 9#include <map>
10#include <optional>
10#include <set> 11#include <set>
11#include <string> 12#include <string>
12#include <tuple> 13#include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
290 const Sampler& sampler; 291 const Sampler& sampler;
291 Node array{}; 292 Node array{};
292 Node depth_compare{}; 293 Node depth_compare{};
294 std::vector<Node> aoffi;
293 Node bias{}; 295 Node bias{};
294 Node lod{}; 296 Node lod{};
295 Node component{}; 297 Node component{};
@@ -741,14 +743,14 @@ private:
741 743
742 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 744 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
743 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 745 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
744 bool is_array); 746 bool is_array, bool is_aoffi);
745 747
746 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 748 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
747 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 749 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
748 bool is_array); 750 bool is_array);
749 751
750 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 752 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
751 bool depth_compare, bool is_array); 753 bool depth_compare, bool is_array, bool is_aoffi);
752 754
753 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 755 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
754 bool is_array); 756 bool is_array);
@@ -757,9 +759,11 @@ private:
757 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, 759 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
758 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); 760 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
759 761
762 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
763
760 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 764 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
761 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, 765 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
762 Node array, Node depth_compare, u32 bias_offset); 766 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
763 767
764 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, 768 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
765 u64 byte_height); 769 u64 byte_height);
@@ -773,6 +777,8 @@ private:
773 777
774 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); 778 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
775 779
780 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
781
776 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); 782 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
777 783
778 template <typename... T> 784 template <typename... T>
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 33b071747..4505667ff 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
6#include <utility> 6#include <utility>
7#include <variant> 7#include <variant>
8 8
9#include "common/common_types.h"
9#include "video_core/shader/shader_ir.h" 10#include "video_core/shader/shader_ir.h"
10 11
11namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
14std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
15 OperationCode operation_code) { 16 OperationCode operation_code) {
16 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
17 const Node node = code[cursor]; 18 const Node node = code.at(cursor);
18 if (const auto operation = std::get_if<OperationNode>(node)) { 19 if (const auto operation = std::get_if<OperationNode>(node)) {
19 if (operation->GetCode() == operation_code) 20 if (operation->GetCode() == operation_code)
20 return {node, cursor}; 21 return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
64 return nullptr; 65 return nullptr;
65} 66}
66 67
68std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
69 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
70 // that it uses as operand
71 const auto [found, found_cursor] =
72 TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
73 if (!found) {
74 return {};
75 }
76 if (const auto immediate = std::get_if<ImmediateNode>(found)) {
77 return immediate->GetValue();
78 }
79 return {};
80}
81
67std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, 82std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
68 s64 cursor) { 83 s64 cursor) {
69 for (; cursor >= 0; --cursor) { 84 for (; cursor >= 0; --cursor) {