summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/templates/build-standard.yml2
-rw-r--r--.ci/templates/build-testing.yml2
-rw-r--r--.ci/templates/release.yml29
-rw-r--r--README.md3
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp9
-rw-r--r--src/core/core.h4
-rw-r--r--src/core/hle/kernel/process.cpp29
-rw-r--r--src/core/hle/kernel/process.h11
-rw-r--r--src/core/hle/kernel/svc.cpp12
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/kepler_compute.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp261
-rw-r--r--src/video_core/engines/maxwell_3d.h89
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h26
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp178
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/shader/decode.cpp8
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp69
-rw-r--r--src/video_core/shader/decode/image.cpp6
-rw-r--r--src/video_core/shader/decode/memory.cpp37
-rw-r--r--src/video_core/shader/decode/other.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp34
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h3
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp118
-rw-r--r--src/video_core/shader/shader_ir.h13
-rw-r--r--src/video_core/shader/track.cpp35
-rw-r--r--src/video_core/texture_cache/texture_cache.h13
38 files changed, 778 insertions, 365 deletions
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
index 9975f5c49..6cd209dbf 100644
--- a/.ci/templates/build-standard.yml
+++ b/.ci/templates/build-standard.yml
@@ -3,7 +3,7 @@ jobs:
3 displayName: 'standard' 3 displayName: 'standard'
4 pool: 4 pool:
5 vmImage: ubuntu-latest 5 vmImage: ubuntu-latest
6 strategy: 6 strategy:
7 maxParallel: 10 7 maxParallel: 10
8 matrix: 8 matrix:
9 windows: 9 windows:
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml
index 101e52996..278efb6f5 100644
--- a/.ci/templates/build-testing.yml
+++ b/.ci/templates/build-testing.yml
@@ -3,7 +3,7 @@ jobs:
3 displayName: 'testing' 3 displayName: 'testing'
4 pool: 4 pool:
5 vmImage: ubuntu-latest 5 vmImage: ubuntu-latest
6 strategy: 6 strategy:
7 maxParallel: 10 7 maxParallel: 10
8 matrix: 8 matrix:
9 windows: 9 windows:
diff --git a/.ci/templates/release.yml b/.ci/templates/release.yml
deleted file mode 100644
index 60bebd2aa..000000000
--- a/.ci/templates/release.yml
+++ /dev/null
@@ -1,29 +0,0 @@
1steps:
2 - task: DownloadPipelineArtifact@2
3 displayName: 'Download Windows Release'
4 inputs:
5 artifactName: 'yuzu-$(BuildName)-windows-mingw'
6 buildType: 'current'
7 targetPath: '$(Build.ArtifactStagingDirectory)'
8 - task: DownloadPipelineArtifact@2
9 displayName: 'Download Linux Release'
10 inputs:
11 artifactName: 'yuzu-$(BuildName)-linux'
12 buildType: 'current'
13 targetPath: '$(Build.ArtifactStagingDirectory)'
14 - task: DownloadPipelineArtifact@2
15 displayName: 'Download Release Point'
16 inputs:
17 artifactName: 'yuzu-$(BuildName)-release-point'
18 buildType: 'current'
19 targetPath: '$(Build.ArtifactStagingDirectory)'
20 - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha
21 displayName: 'Calculate Release Point'
22 - task: GitHubRelease@0
23 inputs:
24 gitHubConnection: $(GitHubReleaseConnectionName)
25 repositoryName: '$(GitHubReleaseRepoName)'
26 action: 'create'
27 target: $(variables.tagcommit)
28 title: 'yuzu $(BuildName) #$(Build.BuildId)'
29 assets: '$(Build.ArtifactStagingDirectory)/*'
diff --git a/README.md b/README.md
index 4b1ea7d7c..32b889063 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
1yuzu emulator 1yuzu emulator
2============= 2=============
3[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu) 3[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu)
4[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu) 4[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu)
5[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
5 6
6yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). 7yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
7 8
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index b0ee7821a..97d5c2a8a 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
50 50
51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, 51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
52 void* user_data) { 52 void* user_data) {
53 auto* const system = static_cast<System*>(user_data);
54
53 ARM_Interface::ThreadContext ctx{}; 55 ARM_Interface::ThreadContext ctx{};
54 Core::CurrentArmInterface().SaveContext(ctx); 56 system->CurrentArmInterface().SaveContext(ctx);
55 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, 57 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
56 ctx.pc, ctx.cpu_registers[30]); 58 ctx.pc, ctx.cpu_registers[30]);
57 return {}; 59
60 return false;
58} 61}
59 62
60ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { 63ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
65 68
66 uc_hook hook{}; 69 uc_hook hook{};
67 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); 70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
68 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); 71 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
69 if (GDBStub::IsServerEnabled()) { 72 if (GDBStub::IsServerEnabled()) {
70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); 73 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
71 last_bkpt_hit = false; 74 last_bkpt_hit = false;
diff --git a/src/core/core.h b/src/core/core.h
index 11e73278e..8ebb385ac 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -327,10 +327,6 @@ private:
327 static System s_instance; 327 static System s_instance;
328}; 328};
329 329
330inline ARM_Interface& CurrentArmInterface() {
331 return System::GetInstance().CurrentArmInterface();
332}
333
334inline Kernel::Process* CurrentProcess() { 330inline Kernel::Process* CurrentProcess() {
335 return System::GetInstance().CurrentProcess(); 331 return System::GetInstance().CurrentProcess();
336} 332}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index db3ab14ce..92169a97b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
184} 184}
185 185
186void Process::Run(s32 main_thread_priority, u64 stack_size) { 186void Process::Run(s32 main_thread_priority, u64 stack_size) {
187 // The kernel always ensures that the given stack size is page aligned. 187 AllocateMainThreadStack(stack_size);
188 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); 188 tls_region_address = CreateTLSRegion();
189
190 // Allocate and map the main thread stack
191 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
192 // of the user address space.
193 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
194 vm_manager
195 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
196 0, main_thread_stack_size, MemoryState::Stack)
197 .Unwrap();
198 189
199 vm_manager.LogLayout(); 190 vm_manager.LogLayout();
191
200 ChangeStatus(ProcessStatus::Running); 192 ChangeStatus(ProcessStatus::Running);
201 193
202 SetupMainThread(*this, kernel, main_thread_priority); 194 SetupMainThread(*this, kernel, main_thread_priority);
@@ -226,6 +218,9 @@ void Process::PrepareForTermination() {
226 stop_threads(system.Scheduler(2).GetThreadList()); 218 stop_threads(system.Scheduler(2).GetThreadList());
227 stop_threads(system.Scheduler(3).GetThreadList()); 219 stop_threads(system.Scheduler(3).GetThreadList());
228 220
221 FreeTLSRegion(tls_region_address);
222 tls_region_address = 0;
223
229 ChangeStatus(ProcessStatus::Exited); 224 ChangeStatus(ProcessStatus::Exited);
230} 225}
231 226
@@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
325 WakeupAllWaitingThreads(); 320 WakeupAllWaitingThreads();
326} 321}
327 322
323void Process::AllocateMainThreadStack(u64 stack_size) {
324 // The kernel always ensures that the given stack size is page aligned.
325 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
326
327 // Allocate and map the main thread stack
328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
329 vm_manager
330 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
331 0, main_thread_stack_size, MemoryState::Stack)
332 .Unwrap();
333}
334
328} // namespace Kernel 335} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 3196014da..c2df451f3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,11 @@ public:
135 return mutex; 135 return mutex;
136 } 136 }
137 137
138 /// Gets the address to the process' dedicated TLS region.
139 VAddr GetTLSRegionAddress() const {
140 return tls_region_address;
141 }
142
138 /// Gets the current status of the process 143 /// Gets the current status of the process
139 ProcessStatus GetStatus() const { 144 ProcessStatus GetStatus() const {
140 return status; 145 return status;
@@ -296,6 +301,9 @@ private:
296 /// a process signal. 301 /// a process signal.
297 void ChangeStatus(ProcessStatus new_status); 302 void ChangeStatus(ProcessStatus new_status);
298 303
304 /// Allocates the main thread stack for the process, given the stack size in bytes.
305 void AllocateMainThreadStack(u64 stack_size);
306
299 /// Memory manager for this process. 307 /// Memory manager for this process.
300 Kernel::VMManager vm_manager; 308 Kernel::VMManager vm_manager;
301 309
@@ -358,6 +366,9 @@ private:
358 /// variable related facilities. 366 /// variable related facilities.
359 Mutex mutex; 367 Mutex mutex;
360 368
369 /// Address indicating the location of the process' dedicated TLS region.
370 VAddr tls_region_address = 0;
371
361 /// Random values for svcGetInfo RandomEntropy 372 /// Random values for svcGetInfo RandomEntropy
362 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; 373 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
363 374
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index a46eed3da..1fd1a732a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
843 return RESULT_SUCCESS; 843 return RESULT_SUCCESS;
844 844
845 case GetInfoType::UserExceptionContextAddr: 845 case GetInfoType::UserExceptionContextAddr:
846 LOG_WARNING(Kernel_SVC, 846 *result = process->GetTLSRegionAddress();
847 "(STUBBED) Attempted to query user exception context address, returned 0");
848 *result = 0;
849 return RESULT_SUCCESS; 847 return RESULT_SUCCESS;
850 848
851 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: 849 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
@@ -1739,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
1739// Wait for an address (via Address Arbiter) 1737// Wait for an address (via Address Arbiter)
1740static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, 1738static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1741 s64 timeout) { 1739 s64 timeout) {
1742 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", 1740 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
1743 address, type, value, timeout); 1741 type, value, timeout);
1744 1742
1745 // If the passed address is a kernel virtual address, return invalid memory state. 1743 // If the passed address is a kernel virtual address, return invalid memory state.
1746 if (Memory::IsKernelVirtualAddress(address)) { 1744 if (Memory::IsKernelVirtualAddress(address)) {
@@ -1762,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
1762// Signals to an address (via Address Arbiter) 1760// Signals to an address (via Address Arbiter)
1763static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, 1761static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
1764 s32 num_to_wake) { 1762 s32 num_to_wake) {
1765 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", 1763 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
1766 address, type, value, num_to_wake); 1764 address, type, value, num_to_wake);
1767 1765
1768 // If the passed address is a kernel virtual address, return invalid memory state. 1766 // If the passed address is a kernel virtual address, return invalid memory state.
1769 if (Memory::IsKernelVirtualAddress(address)) { 1767 if (Memory::IsKernelVirtualAddress(address)) {
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..bd036cbe8 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); 25 gpu.Maxwell3D().dirty.OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 089465a71..08586d33c 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
37 const bool is_last_call = method_call.IsLastCall(); 37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call); 38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) { 39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 40 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
41 } 41 }
42 break; 42 break;
43 } 43 }
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..44279de00 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 37 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..fe9fc0278 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitDirtySettings();
25 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
26} 27}
27 28
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
69 regs.stencil_back_func_mask = 0xFFFFFFFF; 70 regs.stencil_back_func_mask = 0xFFFFFFFF;
70 regs.stencil_back_mask = 0xFFFFFFFF; 71 regs.stencil_back_mask = 0xFFFFFFFF;
71 72
73 regs.depth_test_func = Regs::ComparisonOp::Always;
74 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
75 regs.cull.cull_face = Regs::Cull::CullFace::Back;
76
72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 77 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
73 // register carrying a default value. Assume it's OpenGL's default (1). 78 // register carrying a default value. Assume it's OpenGL's default (1).
74 regs.point_size = 1.0f; 79 regs.point_size = 1.0f;
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
86 regs.rt_separate_frag_data = 1; 91 regs.rt_separate_frag_data = 1;
87} 92}
88 93
94#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
95
96void Maxwell3D::InitDirtySettings() {
97 const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
98 const auto start_itr = dirty_pointers.begin() + start;
99 const auto end_itr = start_itr + range;
100 std::fill(start_itr, end_itr, position);
101 };
102 dirty.regs.fill(true);
103
104 // Init Render Targets
105 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
106 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
107 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
108 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
109 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
110 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
111 rt_dirty_reg++;
112 }
113 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
114 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
115 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
116 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
117 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
118 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
119 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
120
121 // Init Vertex Arrays
122 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
123 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
124 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
125 u32 va_reg = DIRTY_REGS_POS(vertex_array);
126 u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
127 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
128 vertex_reg += vertex_array_size) {
129 set_block(vertex_reg, 3, va_reg);
130 // The divisor concerns vertex array instances
131 dirty_pointers[vertex_reg + 3] = vi_reg;
132 va_reg++;
133 vi_reg++;
134 }
135 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
136 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
137 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
138 va_reg = DIRTY_REGS_POS(vertex_array);
139 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
140 vertex_reg += vertex_limit_size) {
141 set_block(vertex_reg, vertex_limit_size, va_reg);
142 va_reg++;
143 }
144 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
145 constexpr u32 vertex_instance_size =
146 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
147 constexpr u32 vertex_instance_end =
148 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
149 vi_reg = DIRTY_REGS_POS(vertex_instance);
150 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
151 vertex_reg += vertex_instance_size) {
152 set_block(vertex_reg, vertex_instance_size, vi_reg);
153 vi_reg++;
154 }
155 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
156 DIRTY_REGS_POS(vertex_attrib_format));
157
158 // Init Shaders
159 constexpr u32 shader_registers_count =
160 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
161 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
162 DIRTY_REGS_POS(shaders));
163
164 // State
165
166 // Viewport
167 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
168 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
169 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
170 set_block(viewport_start, viewport_size, viewport_dirty_reg);
171 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
172 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
173 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
174
175 // Viewport transformation
176 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
177 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
178 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
179
180 // Cullmode
181 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
182 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
183 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
184
185 // Screen y control
186 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
187
188 // Primitive Restart
189 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
190 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
191 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
192
193 // Depth Test
194 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
195 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
196 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
197 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
198
199 // Stencil Test
200 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
201 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
202 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
205 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
206 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
207 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
210 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
211 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
212 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
217
218 // Color Mask
219 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
220 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
221 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
222 color_mask_dirty_reg);
223 // Blend State
224 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
225 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
226 blend_state_dirty_reg);
227 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
229 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
230 blend_state_dirty_reg);
231
232 // Scissor State
233 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
234 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
235 scissor_test_dirty_reg);
236
237 // Polygon Offset
238 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
239 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
240 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
241 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
242 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
243 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
244 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
245}
246
89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 247void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
90 // Reset the current macro. 248 // Reset the current macro.
91 executing_macro = 0; 249 executing_macro = 0;
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 266
109 const u32 method = method_call.method; 267 const u32 method = method_call.method;
110 268
269 if (method == cb_data_state.current) {
270 regs.reg_array[method] = method_call.argument;
271 ProcessCBData(method_call.argument);
272 return;
273 } else if (cb_data_state.current != null_cb_data) {
274 FinishCBData();
275 }
276
111 // It is an error to write to a register other than the current macro's ARG register before it 277 // It is an error to write to a register other than the current macro's ARG register before it
112 // has finished execution. 278 // has finished execution.
113 if (executing_macro != 0) { 279 if (executing_macro != 0) {
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
143 309
144 if (regs.reg_array[method] != method_call.argument) { 310 if (regs.reg_array[method] != method_call.argument) {
145 regs.reg_array[method] = method_call.argument; 311 regs.reg_array[method] = method_call.argument;
146 // Color buffers 312 const std::size_t dirty_reg = dirty_pointers[method];
147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 313 if (dirty_reg) {
148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 314 dirty.regs[dirty_reg] = true;
149 if (method >= first_rt_reg && 315 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 316 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; 317 dirty.vertex_array_buffers = true;
152 dirty_flags.color_buffer.set(rt_index); 318 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
153 } 319 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
154 320 dirty.vertex_instances = true;
155 // Zeta buffer 321 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 322 dirty_reg < DIRTY_REGS_POS(render_settings)) {
157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) || 323 dirty.render_settings = true;
158 method == MAXWELL3D_REG_INDEX(zeta_width) || 324 }
159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
162 dirty_flags.zeta_buffer = true;
163 }
164
165 // Shader
166 constexpr u32 shader_registers_count =
167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
170 dirty_flags.shaders = true;
171 }
172
173 // Vertex format
174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true;
177 }
178
179 // Vertex buffer
180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 } 325 }
190 } 326 }
191 327
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
214 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 350 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
215 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 351 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
216 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 352 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
217 ProcessCBData(method_call.argument); 353 StartCBData(method);
218 break; 354 break;
219 } 355 }
220 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 356 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
261 const bool is_last_call = method_call.IsLastCall(); 397 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call); 398 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) { 399 if (is_last_call) {
264 dirty_flags.OnMemoryWrite(); 400 dirty.OnMemoryWrite();
265 } 401 }
266 break; 402 break;
267 } 403 }
@@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() {
333 query_result.timestamp = system.CoreTiming().GetTicks(); 469 query_result.timestamp = system.CoreTiming().GetTicks();
334 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); 470 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
335 } 471 }
336 dirty_flags.OnMemoryWrite();
337 break; 472 break;
338 } 473 }
339 default: 474 default:
@@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
405} 540}
406 541
407void Maxwell3D::ProcessCBData(u32 value) { 542void Maxwell3D::ProcessCBData(u32 value) {
543 const u32 id = cb_data_state.id;
544 cb_data_state.buffer[id][cb_data_state.counter] = value;
545 // Increment the current buffer position.
546 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
547 cb_data_state.counter++;
548}
549
550void Maxwell3D::StartCBData(u32 method) {
551 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
552 cb_data_state.start_pos = regs.const_buffer.cb_pos;
553 cb_data_state.id = method - first_cb_data;
554 cb_data_state.current = method;
555 cb_data_state.counter = 0;
556 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
557}
558
559void Maxwell3D::FinishCBData() {
408 // Write the input value to the current const buffer at the current position. 560 // Write the input value to the current const buffer at the current position.
409 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 561 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
410 ASSERT(buffer_address != 0); 562 ASSERT(buffer_address != 0);
411 563
412 // Don't allow writing past the end of the buffer. 564 // Don't allow writing past the end of the buffer.
413 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 565 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
414 566
415 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; 567 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
568 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
416 569
417 u8* ptr{memory_manager.GetPointer(address)}; 570 const u32 id = cb_data_state.id;
418 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 571 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
419 memory_manager.Write<u32>(address, value); 572 dirty.OnMemoryWrite();
420 573
421 dirty_flags.OnMemoryWrite(); 574 cb_data_state.id = null_cb_data;
422 575 cb_data_state.current = null_cb_data;
423 // Increment the current buffer position.
424 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
425} 576}
426 577
427Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 578Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8d15c8a48..ac300bf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1124,23 +1124,77 @@ public:
1124 1124
1125 State state{}; 1125 State state{};
1126 1126
1127 struct DirtyFlags { 1127 struct DirtyRegs {
1128 std::bitset<8> color_buffer{0xFF}; 1128 static constexpr std::size_t NUM_REGS = 256;
1129 std::bitset<32> vertex_array{0xFFFFFFFF}; 1129 union {
1130 struct {
1131 bool null_dirty;
1132
1133 // Vertex Attributes
1134 bool vertex_attrib_format;
1135
1136 // Vertex Arrays
1137 std::array<bool, 32> vertex_array;
1138
1139 bool vertex_array_buffers;
1140
1141 // Vertex Instances
1142 std::array<bool, 32> vertex_instance;
1143
1144 bool vertex_instances;
1145
1146 // Render Targets
1147 std::array<bool, 8> render_target;
1148 bool depth_buffer;
1149
1150 bool render_settings;
1151
1152 // Shaders
1153 bool shaders;
1154
1155 // Rasterizer State
1156 bool viewport;
1157 bool clip_coefficient;
1158 bool cull_mode;
1159 bool primitive_restart;
1160 bool depth_test;
1161 bool stencil_test;
1162 bool blend_state;
1163 bool scissor_test;
1164 bool transform_feedback;
1165 bool color_mask;
1166 bool polygon_offset;
1130 1167
1131 bool vertex_attrib_format = true; 1168 // Complementary
1132 bool zeta_buffer = true; 1169 bool viewport_transform;
1133 bool shaders = true; 1170 bool screen_y_control;
1171
1172 bool memory_general;
1173 };
1174 std::array<bool, NUM_REGS> regs;
1175 };
1176
1177 void ResetVertexArrays() {
1178 vertex_array.fill(true);
1179 vertex_array_buffers = true;
1180 }
1181
1182 void ResetRenderTargets() {
1183 depth_buffer = true;
1184 render_target.fill(true);
1185 render_settings = true;
1186 }
1134 1187
1135 void OnMemoryWrite() { 1188 void OnMemoryWrite() {
1136 zeta_buffer = true;
1137 shaders = true; 1189 shaders = true;
1138 color_buffer.set(); 1190 memory_general = true;
1139 vertex_array.set(); 1191 ResetRenderTargets();
1192 ResetVertexArrays();
1140 } 1193 }
1141 };
1142 1194
1143 DirtyFlags dirty_flags; 1195 } dirty{};
1196
1197 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1144 1198
1145 /// Reads a register value located at the input method address 1199 /// Reads a register value located at the input method address
1146 u32 GetRegisterValue(u32 method) const; 1200 u32 GetRegisterValue(u32 method) const;
@@ -1192,6 +1246,15 @@ private:
1192 /// Interpreter for the macro codes uploaded to the GPU. 1246 /// Interpreter for the macro codes uploaded to the GPU.
1193 MacroInterpreter macro_interpreter; 1247 MacroInterpreter macro_interpreter;
1194 1248
1249 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1250 struct {
1251 std::array<std::array<u32, 0x4000>, 16> buffer;
1252 u32 current{null_cb_data};
1253 u32 id{null_cb_data};
1254 u32 start_pos{};
1255 u32 counter{};
1256 } cb_data_state;
1257
1195 Upload::State upload_state; 1258 Upload::State upload_state;
1196 1259
1197 /// Retrieves information about a specific TIC entry from the TIC buffer. 1260 /// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -1200,6 +1263,8 @@ private:
1200 /// Retrieves information about a specific TSC entry from the TSC buffer. 1263 /// Retrieves information about a specific TSC entry from the TSC buffer.
1201 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1264 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1202 1265
1266 void InitDirtySettings();
1267
1203 /** 1268 /**
1204 * Call a macro on this engine. 1269 * Call a macro on this engine.
1205 * @param method Method to call 1270 * @param method Method to call
@@ -1223,7 +1288,9 @@ private:
1223 void ProcessSyncPoint(); 1288 void ProcessSyncPoint();
1224 1289
1225 /// Handles a write to the CB_DATA[i] register. 1290 /// Handles a write to the CB_DATA[i] register.
1291 void StartCBData(u32 method);
1226 void ProcessCBData(u32 value); 1292 void ProcessCBData(u32 value);
1293 void FinishCBData();
1227 1294
1228 /// Handles a write to the CB_BIND register. 1295 /// Handles a write to the CB_BIND register.
1229 void ProcessCBBind(Regs::ShaderStage stage); 1296 void ProcessCBBind(Regs::ShaderStage stage);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0..b5f57e534 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
58 } 58 }
59 59
60 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
62 62
63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 79d469b88..8520a0143 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -931,8 +931,6 @@ union Instruction {
931 } csetp; 931 } csetp;
932 932
933 union { 933 union {
934 BitField<35, 4, PredCondition> cond;
935 BitField<49, 1, u64> h_and;
936 BitField<6, 1, u64> ftz; 934 BitField<6, 1, u64> ftz;
937 BitField<45, 2, PredOperation> op; 935 BitField<45, 2, PredOperation> op;
938 BitField<3, 3, u64> pred3; 936 BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
940 BitField<43, 1, u64> negate_a; 938 BitField<43, 1, u64> negate_a;
941 BitField<44, 1, u64> abs_a; 939 BitField<44, 1, u64> abs_a;
942 BitField<47, 2, HalfType> type_a; 940 BitField<47, 2, HalfType> type_a;
943 BitField<31, 1, u64> negate_b; 941 union {
944 BitField<30, 1, u64> abs_b; 942 BitField<35, 4, PredCondition> cond;
945 BitField<28, 2, HalfType> type_b; 943 BitField<49, 1, u64> h_and;
944 BitField<31, 1, u64> negate_b;
945 BitField<30, 1, u64> abs_b;
946 BitField<28, 2, HalfType> type_b;
947 } reg;
948 union {
949 BitField<56, 1, u64> negate_b;
950 BitField<54, 1, u64> abs_b;
951 } cbuf;
952 union {
953 BitField<49, 4, PredCondition> cond;
954 BitField<53, 1, u64> h_and;
955 } cbuf_and_imm;
946 BitField<42, 1, u64> neg_pred; 956 BitField<42, 1, u64> neg_pred;
947 BitField<39, 3, u64> pred39; 957 BitField<39, 3, u64> pred39;
948 } hsetp2; 958 } hsetp2;
@@ -1548,7 +1558,9 @@ public:
1548 HFMA2_RC, 1558 HFMA2_RC,
1549 HFMA2_RR, 1559 HFMA2_RR,
1550 HFMA2_IMM_R, 1560 HFMA2_IMM_R,
1561 HSETP2_C,
1551 HSETP2_R, 1562 HSETP2_R,
1563 HSETP2_IMM,
1552 HSET2_R, 1564 HSET2_R,
1553 POPC_C, 1565 POPC_C,
1554 POPC_R, 1566 POPC_R,
@@ -1831,7 +1843,9 @@ private:
1831 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), 1843 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
1832 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), 1844 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
1833 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), 1845 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
1834 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), 1846 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
1847 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
1848 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
1835 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 1849 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
1836 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 1850 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
1837 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 1851 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9cd4cf7b8..c59e687b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -107,6 +107,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
107 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 107 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
108 state.draw.shader_program = 0; 108 state.draw.shader_program = 0;
109 state.Apply(); 109 state.Apply();
110 clear_framebuffer.Create();
110 111
111 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); 112 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
112 CheckExtensions(); 113 CheckExtensions();
@@ -126,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
126 auto& gpu = system.GPU().Maxwell3D(); 127 auto& gpu = system.GPU().Maxwell3D();
127 const auto& regs = gpu.regs; 128 const auto& regs = gpu.regs;
128 129
129 if (!gpu.dirty_flags.vertex_attrib_format) { 130 if (!gpu.dirty.vertex_attrib_format) {
130 return state.draw.vertex_array; 131 return state.draw.vertex_array;
131 } 132 }
132 gpu.dirty_flags.vertex_attrib_format = false; 133 gpu.dirty.vertex_attrib_format = false;
133 134
134 MICROPROFILE_SCOPE(OpenGL_VAO); 135 MICROPROFILE_SCOPE(OpenGL_VAO);
135 136
@@ -183,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
183 } 184 }
184 185
185 // Rebinding the VAO invalidates the vertex buffer bindings. 186 // Rebinding the VAO invalidates the vertex buffer bindings.
186 gpu.dirty_flags.vertex_array.set(); 187 gpu.dirty.ResetVertexArrays();
187 188
188 state.draw.vertex_array = vao_entry.handle; 189 state.draw.vertex_array = vao_entry.handle;
189 return vao_entry.handle; 190 return vao_entry.handle;
@@ -191,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
191 192
192void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 193void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
193 auto& gpu = system.GPU().Maxwell3D(); 194 auto& gpu = system.GPU().Maxwell3D();
194 const auto& regs = gpu.regs; 195 if (!gpu.dirty.vertex_array_buffers)
195
196 if (gpu.dirty_flags.vertex_array.none())
197 return; 196 return;
197 gpu.dirty.vertex_array_buffers = false;
198
199 const auto& regs = gpu.regs;
198 200
199 MICROPROFILE_SCOPE(OpenGL_VB); 201 MICROPROFILE_SCOPE(OpenGL_VB);
200 202
201 // Upload all guest vertex arrays sequentially to our buffer 203 // Upload all guest vertex arrays sequentially to our buffer
202 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 204 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
203 if (!gpu.dirty_flags.vertex_array[index]) 205 if (!gpu.dirty.vertex_array[index])
204 continue; 206 continue;
207 gpu.dirty.vertex_array[index] = false;
208 gpu.dirty.vertex_instance[index] = false;
205 209
206 const auto& vertex_array = regs.vertex_array[index]; 210 const auto& vertex_array = regs.vertex_array[index];
207 if (!vertex_array.IsEnabled()) 211 if (!vertex_array.IsEnabled())
@@ -226,8 +230,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
226 glVertexArrayBindingDivisor(vao, index, 0); 230 glVertexArrayBindingDivisor(vao, index, 0);
227 } 231 }
228 } 232 }
233}
234
235void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
236 auto& gpu = system.GPU().Maxwell3D();
237
238 if (!gpu.dirty.vertex_instances)
239 return;
240 gpu.dirty.vertex_instances = false;
229 241
230 gpu.dirty_flags.vertex_array.reset(); 242 const auto& regs = gpu.regs;
243 // Upload all guest vertex arrays sequentially to our buffer
244 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
245 if (!gpu.dirty.vertex_instance[index])
246 continue;
247
248 gpu.dirty.vertex_instance[index] = false;
249
250 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
251 regs.vertex_array[index].divisor != 0) {
252 // Enable vertex buffer instancing with the specified divisor.
253 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
254 } else {
255 // Disable the vertex buffer instancing.
256 glVertexArrayBindingDivisor(vao, index, 0);
257 }
258 }
231} 259}
232 260
233GLintptr RasterizerOpenGL::SetupIndexBuffer() { 261GLintptr RasterizerOpenGL::SetupIndexBuffer() {
@@ -343,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 371
344 SyncClipEnabled(clip_distances); 372 SyncClipEnabled(clip_distances);
345 373
346 gpu.dirty_flags.shaders = false; 374 gpu.dirty.shaders = false;
347} 375}
348 376
349std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 377std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -426,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
426 454
427 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 455 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
428 single_color_target}; 456 single_color_target};
429 if (fb_config_state == current_framebuffer_config_state && 457 if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
430 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
431 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 458 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
432 // single color targets). This is done because the guest registers may not change but the 459 // single color targets). This is done because the guest registers may not change but the
433 // host framebuffer may contain different attachments 460 // host framebuffer may contain different attachments
434 return current_depth_stencil_usage; 461 return current_depth_stencil_usage;
435 } 462 }
463 gpu.dirty.render_settings = false;
436 current_framebuffer_config_state = fb_config_state; 464 current_framebuffer_config_state = fb_config_state;
437 465
438 texture_cache.GuardRenderTargets(true); 466 texture_cache.GuardRenderTargets(true);
@@ -521,13 +549,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
521 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; 549 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
522} 550}
523 551
552void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
553 bool using_depth_fb, bool using_stencil_fb) {
554 auto& gpu = system.GPU().Maxwell3D();
555 const auto& regs = gpu.regs;
556
557 texture_cache.GuardRenderTargets(true);
558 View color_surface{};
559 if (using_color_fb) {
560 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
561 }
562 View depth_surface{};
563 if (using_depth_fb || using_stencil_fb) {
564 depth_surface = texture_cache.GetDepthBufferSurface(false);
565 }
566 texture_cache.GuardRenderTargets(false);
567
568 current_state.draw.draw_framebuffer = clear_framebuffer.handle;
569 current_state.ApplyFramebufferState();
570
571 if (color_surface) {
572 color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
573 } else {
574 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
575 }
576
577 if (depth_surface) {
578 const auto& params = depth_surface->GetSurfaceParams();
579 switch (params.type) {
580 case VideoCore::Surface::SurfaceType::Depth: {
581 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
582 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
583 break;
584 }
585 case VideoCore::Surface::SurfaceType::DepthStencil: {
586 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
587 break;
588 }
589 default: { UNIMPLEMENTED(); }
590 }
591 } else {
592 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
593 0);
594 }
595}
596
524void RasterizerOpenGL::Clear() { 597void RasterizerOpenGL::Clear() {
525 const auto& regs = system.GPU().Maxwell3D().regs; 598 const auto& regs = system.GPU().Maxwell3D().regs;
526 bool use_color{}; 599 bool use_color{};
527 bool use_depth{}; 600 bool use_depth{};
528 bool use_stencil{}; 601 bool use_stencil{};
529 602
530 OpenGLState clear_state; 603 OpenGLState prev_state{OpenGLState::GetCurState()};
604 SCOPE_EXIT({
605 prev_state.AllDirty();
606 prev_state.Apply();
607 });
608
609 OpenGLState clear_state{OpenGLState::GetCurState()};
610 clear_state.SetDefaultViewports();
531 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 611 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
532 regs.clear_buffers.A) { 612 regs.clear_buffers.A) {
533 use_color = true; 613 use_color = true;
@@ -547,6 +627,7 @@ void RasterizerOpenGL::Clear() {
547 // true. 627 // true.
548 clear_state.depth.test_enabled = true; 628 clear_state.depth.test_enabled = true;
549 clear_state.depth.test_func = GL_ALWAYS; 629 clear_state.depth.test_func = GL_ALWAYS;
630 clear_state.depth.write_mask = GL_TRUE;
550 } 631 }
551 if (regs.clear_buffers.S) { 632 if (regs.clear_buffers.S) {
552 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 633 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -583,8 +664,9 @@ void RasterizerOpenGL::Clear() {
583 return; 664 return;
584 } 665 }
585 666
586 const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( 667 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
587 clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); 668
669 SyncViewport(clear_state);
588 if (regs.clear_flags.scissor) { 670 if (regs.clear_flags.scissor) {
589 SyncScissorTest(clear_state); 671 SyncScissorTest(clear_state);
590 } 672 }
@@ -593,21 +675,18 @@ void RasterizerOpenGL::Clear() {
593 clear_state.EmulateViewportWithScissor(); 675 clear_state.EmulateViewportWithScissor();
594 } 676 }
595 677
596 clear_state.ApplyColorMask(); 678 clear_state.AllDirty();
597 clear_state.ApplyDepth(); 679 clear_state.Apply();
598 clear_state.ApplyStencilTest();
599 clear_state.ApplyViewport();
600 clear_state.ApplyFramebufferState();
601 680
602 if (use_color) { 681 if (use_color) {
603 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 682 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
604 } 683 }
605 684
606 if (clear_depth && clear_stencil) { 685 if (use_depth && use_stencil) {
607 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 686 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
608 } else if (clear_depth) { 687 } else if (use_depth) {
609 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth); 688 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
610 } else if (clear_stencil) { 689 } else if (use_stencil) {
611 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 690 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
612 } 691 }
613} 692}
@@ -663,6 +742,7 @@ void RasterizerOpenGL::DrawArrays() {
663 742
664 // Upload vertex and index data. 743 // Upload vertex and index data.
665 SetupVertexBuffer(vao); 744 SetupVertexBuffer(vao);
745 SetupVertexInstances(vao);
666 const GLintptr index_buffer_offset = SetupIndexBuffer(); 746 const GLintptr index_buffer_offset = SetupIndexBuffer();
667 747
668 // Setup draw parameters. It will automatically choose what glDraw* method to use. 748 // Setup draw parameters. It will automatically choose what glDraw* method to use.
@@ -689,7 +769,7 @@ void RasterizerOpenGL::DrawArrays() {
689 769
690 if (invalidate) { 770 if (invalidate) {
691 // As all cached buffers are invalidated, we need to recheck their state. 771 // As all cached buffers are invalidated, we need to recheck their state.
692 gpu.dirty_flags.vertex_array.set(); 772 gpu.dirty.ResetVertexArrays();
693 } 773 }
694 774
695 shader_program_manager->ApplyTo(state); 775 shader_program_manager->ApplyTo(state);
@@ -702,6 +782,7 @@ void RasterizerOpenGL::DrawArrays() {
702 params.DispatchDraw(); 782 params.DispatchDraw();
703 783
704 accelerate_draw = AccelDraw::Disabled; 784 accelerate_draw = AccelDraw::Disabled;
785 gpu.dirty.memory_general = false;
705} 786}
706 787
707void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 788void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -976,10 +1057,11 @@ void RasterizerOpenGL::SyncClipCoef() {
976} 1057}
977 1058
978void RasterizerOpenGL::SyncCullMode() { 1059void RasterizerOpenGL::SyncCullMode() {
979 const auto& regs = system.GPU().Maxwell3D().regs; 1060 auto& maxwell3d = system.GPU().Maxwell3D();
980 1061
981 state.cull.enabled = regs.cull.enabled != 0; 1062 const auto& regs = maxwell3d.regs;
982 1063
1064 state.cull.enabled = regs.cull.enabled != 0;
983 if (state.cull.enabled) { 1065 if (state.cull.enabled) {
984 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1066 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
985 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 1067 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -1012,16 +1094,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
1012 state.depth.test_enabled = regs.depth_test_enable != 0; 1094 state.depth.test_enabled = regs.depth_test_enable != 0;
1013 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1095 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
1014 1096
1015 if (!state.depth.test_enabled) 1097 if (!state.depth.test_enabled) {
1016 return; 1098 return;
1099 }
1017 1100
1018 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1101 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
1019} 1102}
1020 1103
1021void RasterizerOpenGL::SyncStencilTestState() { 1104void RasterizerOpenGL::SyncStencilTestState() {
1022 const auto& regs = system.GPU().Maxwell3D().regs; 1105 auto& maxwell3d = system.GPU().Maxwell3D();
1023 state.stencil.test_enabled = regs.stencil_enable != 0; 1106 if (!maxwell3d.dirty.stencil_test) {
1107 return;
1108 }
1109 const auto& regs = maxwell3d.regs;
1024 1110
1111 state.stencil.test_enabled = regs.stencil_enable != 0;
1025 if (!regs.stencil_enable) { 1112 if (!regs.stencil_enable) {
1026 return; 1113 return;
1027 } 1114 }
@@ -1050,10 +1137,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
1050 state.stencil.back.action_depth_fail = GL_KEEP; 1137 state.stencil.back.action_depth_fail = GL_KEEP;
1051 state.stencil.back.action_depth_pass = GL_KEEP; 1138 state.stencil.back.action_depth_pass = GL_KEEP;
1052 } 1139 }
1140 state.MarkDirtyStencilState();
1141 maxwell3d.dirty.stencil_test = false;
1053} 1142}
1054 1143
1055void RasterizerOpenGL::SyncColorMask() { 1144void RasterizerOpenGL::SyncColorMask() {
1056 const auto& regs = system.GPU().Maxwell3D().regs; 1145 auto& maxwell3d = system.GPU().Maxwell3D();
1146 if (!maxwell3d.dirty.color_mask) {
1147 return;
1148 }
1149 const auto& regs = maxwell3d.regs;
1150
1057 const std::size_t count = 1151 const std::size_t count =
1058 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1152 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
1059 for (std::size_t i = 0; i < count; i++) { 1153 for (std::size_t i = 0; i < count; i++) {
@@ -1064,6 +1158,9 @@ void RasterizerOpenGL::SyncColorMask() {
1064 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; 1158 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
1065 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; 1159 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
1066 } 1160 }
1161
1162 state.MarkDirtyColorMask();
1163 maxwell3d.dirty.color_mask = false;
1067} 1164}
1068 1165
1069void RasterizerOpenGL::SyncMultiSampleState() { 1166void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1078,7 +1175,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
1078} 1175}
1079 1176
1080void RasterizerOpenGL::SyncBlendState() { 1177void RasterizerOpenGL::SyncBlendState() {
1081 const auto& regs = system.GPU().Maxwell3D().regs; 1178 auto& maxwell3d = system.GPU().Maxwell3D();
1179 if (!maxwell3d.dirty.blend_state) {
1180 return;
1181 }
1182 const auto& regs = maxwell3d.regs;
1082 1183
1083 state.blend_color.red = regs.blend_color.r; 1184 state.blend_color.red = regs.blend_color.r;
1084 state.blend_color.green = regs.blend_color.g; 1185 state.blend_color.green = regs.blend_color.g;
@@ -1101,6 +1202,8 @@ void RasterizerOpenGL::SyncBlendState() {
1101 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1202 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1102 state.blend[i].enabled = false; 1203 state.blend[i].enabled = false;
1103 } 1204 }
1205 maxwell3d.dirty.blend_state = false;
1206 state.MarkDirtyBlendState();
1104 return; 1207 return;
1105 } 1208 }
1106 1209
@@ -1117,6 +1220,9 @@ void RasterizerOpenGL::SyncBlendState() {
1117 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1220 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1118 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1221 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1119 } 1222 }
1223
1224 state.MarkDirtyBlendState();
1225 maxwell3d.dirty.blend_state = false;
1120} 1226}
1121 1227
1122void RasterizerOpenGL::SyncLogicOpState() { 1228void RasterizerOpenGL::SyncLogicOpState() {
@@ -1168,13 +1274,21 @@ void RasterizerOpenGL::SyncPointState() {
1168} 1274}
1169 1275
1170void RasterizerOpenGL::SyncPolygonOffset() { 1276void RasterizerOpenGL::SyncPolygonOffset() {
1171 const auto& regs = system.GPU().Maxwell3D().regs; 1277 auto& maxwell3d = system.GPU().Maxwell3D();
1278 if (!maxwell3d.dirty.polygon_offset) {
1279 return;
1280 }
1281 const auto& regs = maxwell3d.regs;
1282
1172 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1283 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1173 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1284 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1174 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1285 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1175 state.polygon_offset.units = regs.polygon_offset_units; 1286 state.polygon_offset.units = regs.polygon_offset_units;
1176 state.polygon_offset.factor = regs.polygon_offset_factor; 1287 state.polygon_offset.factor = regs.polygon_offset_factor;
1177 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1288 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1289
1290 state.MarkDirtyPolygonOffset();
1291 maxwell3d.dirty.polygon_offset = false;
1178} 1292}
1179 1293
1180void RasterizerOpenGL::SyncAlphaTest() { 1294void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b2b671230..8b123c48d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -109,6 +109,9 @@ private:
109 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, 109 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
110 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); 110 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
111 111
112 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
113 bool using_depth_fb, bool using_stencil_fb);
114
112 /// Configures the current constbuffers to use for the draw command. 115 /// Configures the current constbuffers to use for the draw command.
113 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 116 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
114 const Shader& shader); 117 const Shader& shader);
@@ -227,6 +230,7 @@ private:
227 GLuint SetupVertexFormat(); 230 GLuint SetupVertexFormat();
228 231
229 void SetupVertexBuffer(GLuint vao); 232 void SetupVertexBuffer(GLuint vao);
233 void SetupVertexInstances(GLuint vao);
230 234
231 GLintptr SetupIndexBuffer(); 235 GLintptr SetupIndexBuffer();
232 236
@@ -237,6 +241,8 @@ private:
237 enum class AccelDraw { Disabled, Arrays, Indexed }; 241 enum class AccelDraw { Disabled, Arrays, Indexed };
238 AccelDraw accelerate_draw = AccelDraw::Disabled; 242 AccelDraw accelerate_draw = AccelDraw::Disabled;
239 243
244 OGLFramebuffer clear_framebuffer;
245
240 using CachedPageMap = boost::icl::interval_map<u64, int>; 246 using CachedPageMap = boost::icl::interval_map<u64, int>;
241 CachedPageMap cached_pages; 247 CachedPageMap cached_pages;
242}; 248};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 865c191bd..1c90facc3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -628,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
628} 628}
629 629
630Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 630Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
631 if (!system.GPU().Maxwell3D().dirty_flags.shaders) { 631 if (!system.GPU().Maxwell3D().dirty.shaders) {
632 return last_shaders[static_cast<std::size_t>(program)]; 632 return last_shaders[static_cast<std::size_t>(program)];
633 } 633 }
634 634
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 50b616be4..ffe26b241 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -257,10 +257,6 @@ public:
257 } 257 }
258 258
259private: 259private:
260 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
261 using OperationDecompilersArray =
262 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
263
264 void DeclareVertex() { 260 void DeclareVertex() {
265 if (!IsVertexShader(stage)) 261 if (!IsVertexShader(stage))
266 return; 262 return;
@@ -1414,14 +1410,10 @@ private:
1414 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); 1410 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
1415 } 1411 }
1416 1412
1417 std::string LogicalAll2(Operation operation) { 1413 std::string LogicalAnd2(Operation operation) {
1418 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); 1414 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1419 } 1415 }
1420 1416
1421 std::string LogicalAny2(Operation operation) {
1422 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1423 }
1424
1425 template <bool with_nan> 1417 template <bool with_nan>
1426 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { 1418 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
1427 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, 1419 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1728,7 +1720,7 @@ private:
1728 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1720 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1729 } 1721 }
1730 1722
1731 static constexpr OperationDecompilersArray operation_decompilers = { 1723 static constexpr std::array operation_decompilers = {
1732 &GLSLDecompiler::Assign, 1724 &GLSLDecompiler::Assign,
1733 1725
1734 &GLSLDecompiler::Select, 1726 &GLSLDecompiler::Select,
@@ -1812,8 +1804,7 @@ private:
1812 &GLSLDecompiler::LogicalXor, 1804 &GLSLDecompiler::LogicalXor,
1813 &GLSLDecompiler::LogicalNegate, 1805 &GLSLDecompiler::LogicalNegate,
1814 &GLSLDecompiler::LogicalPick2, 1806 &GLSLDecompiler::LogicalPick2,
1815 &GLSLDecompiler::LogicalAll2, 1807 &GLSLDecompiler::LogicalAnd2,
1816 &GLSLDecompiler::LogicalAny2,
1817 1808
1818 &GLSLDecompiler::LogicalLessThan<Type::Float>, 1809 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1819 &GLSLDecompiler::LogicalEqual<Type::Float>, 1810 &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1877,6 +1868,7 @@ private:
1877 &GLSLDecompiler::WorkGroupId<1>, 1868 &GLSLDecompiler::WorkGroupId<1>,
1878 &GLSLDecompiler::WorkGroupId<2>, 1869 &GLSLDecompiler::WorkGroupId<2>,
1879 }; 1870 };
1871 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1880 1872
1881 std::string GetRegister(u32 index) const { 1873 std::string GetRegister(u32 index) const {
1882 return GetDeclarationWithSuffix(index, "gpr"); 1874 return GetDeclarationWithSuffix(index, "gpr");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 0eae98afe..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -165,6 +165,25 @@ OpenGLState::OpenGLState() {
165 alpha_test.ref = 0.0f; 165 alpha_test.ref = 0.0f;
166} 166}
167 167
168void OpenGLState::SetDefaultViewports() {
169 for (auto& item : viewports) {
170 item.x = 0;
171 item.y = 0;
172 item.width = 0;
173 item.height = 0;
174 item.depth_range_near = 0.0f;
175 item.depth_range_far = 1.0f;
176 item.scissor.enabled = false;
177 item.scissor.x = 0;
178 item.scissor.y = 0;
179 item.scissor.width = 0;
180 item.scissor.height = 0;
181 }
182
183 depth_clamp.far_plane = false;
184 depth_clamp.near_plane = false;
185}
186
168void OpenGLState::ApplyDefaultState() { 187void OpenGLState::ApplyDefaultState() {
169 glEnable(GL_BLEND); 188 glEnable(GL_BLEND);
170 glDisable(GL_FRAMEBUFFER_SRGB); 189 glDisable(GL_FRAMEBUFFER_SRGB);
@@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const {
526 } 545 }
527} 546}
528 547
529void OpenGLState::Apply() const { 548void OpenGLState::Apply() {
530 MICROPROFILE_SCOPE(OpenGL_State); 549 MICROPROFILE_SCOPE(OpenGL_State);
531 ApplyFramebufferState(); 550 ApplyFramebufferState();
532 ApplyVertexArrayState(); 551 ApplyVertexArrayState();
@@ -536,19 +555,31 @@ void OpenGLState::Apply() const {
536 ApplyPointSize(); 555 ApplyPointSize();
537 ApplyFragmentColorClamp(); 556 ApplyFragmentColorClamp();
538 ApplyMultisample(); 557 ApplyMultisample();
558 if (dirty.color_mask) {
559 ApplyColorMask();
560 dirty.color_mask = false;
561 }
539 ApplyDepthClamp(); 562 ApplyDepthClamp();
540 ApplyColorMask();
541 ApplyViewport(); 563 ApplyViewport();
542 ApplyStencilTest(); 564 if (dirty.stencil_state) {
565 ApplyStencilTest();
566 dirty.stencil_state = false;
567 }
543 ApplySRgb(); 568 ApplySRgb();
544 ApplyCulling(); 569 ApplyCulling();
545 ApplyDepth(); 570 ApplyDepth();
546 ApplyPrimitiveRestart(); 571 ApplyPrimitiveRestart();
547 ApplyBlending(); 572 if (dirty.blend_state) {
573 ApplyBlending();
574 dirty.blend_state = false;
575 }
548 ApplyLogicOp(); 576 ApplyLogicOp();
549 ApplyTextures(); 577 ApplyTextures();
550 ApplySamplers(); 578 ApplySamplers();
551 ApplyPolygonOffset(); 579 if (dirty.polygon_offset) {
580 ApplyPolygonOffset();
581 dirty.polygon_offset = false;
582 }
552 ApplyAlphaTest(); 583 ApplyAlphaTest();
553} 584}
554 585
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,8 +195,9 @@ public:
195 s_rgb_used = false; 195 s_rgb_used = false;
196 } 196 }
197 197
198 void SetDefaultViewports();
198 /// Apply this state as the current OpenGL state 199 /// Apply this state as the current OpenGL state
199 void Apply() const; 200 void Apply();
200 201
201 void ApplyFramebufferState() const; 202 void ApplyFramebufferState() const;
202 void ApplyVertexArrayState() const; 203 void ApplyVertexArrayState() const;
@@ -237,11 +238,41 @@ public:
237 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test 238 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
238 void EmulateViewportWithScissor(); 239 void EmulateViewportWithScissor();
239 240
241 void MarkDirtyBlendState() {
242 dirty.blend_state = true;
243 }
244
245 void MarkDirtyStencilState() {
246 dirty.stencil_state = true;
247 }
248
249 void MarkDirtyPolygonOffset() {
250 dirty.polygon_offset = true;
251 }
252
253 void MarkDirtyColorMask() {
254 dirty.color_mask = true;
255 }
256
257 void AllDirty() {
258 dirty.blend_state = true;
259 dirty.stencil_state = true;
260 dirty.polygon_offset = true;
261 dirty.color_mask = true;
262 }
263
240private: 264private:
241 static OpenGLState cur_state; 265 static OpenGLState cur_state;
242 266
243 // Workaround for sRGB problems caused by QT not supporting srgb output 267 // Workaround for sRGB problems caused by QT not supporting srgb output
244 static bool s_rgb_used; 268 static bool s_rgb_used;
269 struct {
270 bool blend_state;
271 bool stencil_state;
272 bool viewport_state;
273 bool polygon_offset;
274 bool color_mask;
275 } dirty{};
245}; 276};
246 277
247} // namespace OpenGL 278} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b1f6bc7c2..8fcd39a69 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
485 const auto& dst_params{dst_view->GetSurfaceParams()}; 485 const auto& dst_params{dst_view->GetSurfaceParams()};
486 486
487 OpenGLState prev_state{OpenGLState::GetCurState()}; 487 OpenGLState prev_state{OpenGLState::GetCurState()};
488 SCOPE_EXIT({ prev_state.Apply(); }); 488 SCOPE_EXIT({
489 prev_state.AllDirty();
490 prev_state.Apply();
491 });
489 492
490 OpenGLState state; 493 OpenGLState state;
491 state.draw.read_framebuffer = src_framebuffer.handle; 494 state.draw.read_framebuffer = src_framebuffer.handle;
492 state.draw.draw_framebuffer = dst_framebuffer.handle; 495 state.draw.draw_framebuffer = dst_framebuffer.handle;
496 state.AllDirty();
493 state.Apply(); 497 state.Apply();
494 498
495 u32 buffers{}; 499 u32 buffers{};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9ecdddb0d..a05cef3b9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
108 108
109 // Maintain the rasterizer's state as a priority 109 // Maintain the rasterizer's state as a priority
110 OpenGLState prev_state = OpenGLState::GetCurState(); 110 OpenGLState prev_state = OpenGLState::GetCurState();
111 state.AllDirty();
111 state.Apply(); 112 state.Apply();
112 113
113 if (framebuffer) { 114 if (framebuffer) {
@@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
140 system.GetPerfStats().BeginSystemFrame(); 141 system.GetPerfStats().BeginSystemFrame();
141 142
142 // Restore the rasterizer state 143 // Restore the rasterizer state
144 prev_state.AllDirty();
143 prev_state.Apply(); 145 prev_state.Apply();
144} 146}
145 147
@@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
206 // Link shaders and get variable locations 208 // Link shaders and get variable locations
207 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 209 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
208 state.draw.shader_program = shader.handle; 210 state.draw.shader_program = shader.handle;
211 state.AllDirty();
209 state.Apply(); 212 state.Apply();
210 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); 213 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
211 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); 214 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
338 // Workaround brigthness problems in SMO by enabling sRGB in the final output 341 // Workaround brigthness problems in SMO by enabling sRGB in the final output
339 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 342 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
340 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 343 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
344 state.AllDirty();
341 state.Apply(); 345 state.Apply();
342 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); 346 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
343 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 347 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
344 // Restore default state 348 // Restore default state
345 state.framebuffer_srgb.enabled = false; 349 state.framebuffer_srgb.enabled = false;
346 state.texture_units[0].texture = 0; 350 state.texture_units[0].texture = 0;
351 state.AllDirty();
347 state.Apply(); 352 state.Apply();
348 // Clear sRGB state for the next frame 353 // Clear sRGB state for the next frame
349 OpenGLState::ClearsRGBUsed(); 354 OpenGLState::ClearsRGBUsed();
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
388 GLuint old_read_fb = state.draw.read_framebuffer; 393 GLuint old_read_fb = state.draw.read_framebuffer;
389 GLuint old_draw_fb = state.draw.draw_framebuffer; 394 GLuint old_draw_fb = state.draw.draw_framebuffer;
390 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; 395 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
396 state.AllDirty();
391 state.Apply(); 397 state.Apply();
392 398
393 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 399 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
407 screenshot_framebuffer.Release(); 413 screenshot_framebuffer.Release();
408 state.draw.read_framebuffer = old_read_fb; 414 state.draw.read_framebuffer = old_read_fb;
409 state.draw.draw_framebuffer = old_draw_fb; 415 state.draw.draw_framebuffer = old_draw_fb;
416 state.AllDirty();
410 state.Apply(); 417 state.Apply();
411 glDeleteRenderbuffers(1, &renderbuffer); 418 glDeleteRenderbuffers(1, &renderbuffer);
412 419
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 9b2d8e987..d267712c9 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
205 } 205 }
206 206
207private: 207private:
208 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
209 using OperationDecompilersArray =
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 208 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 209
214 void AllocateBindings() { 210 void AllocateBindings() {
@@ -804,12 +800,7 @@ private:
804 return {}; 800 return {};
805 } 801 }
806 802
807 Id LogicalAll2(Operation operation) { 803 Id LogicalAnd2(Operation operation) {
808 UNIMPLEMENTED();
809 return {};
810 }
811
812 Id LogicalAny2(Operation operation) {
813 UNIMPLEMENTED(); 804 UNIMPLEMENTED();
814 return {}; 805 return {};
815 } 806 }
@@ -1206,7 +1197,7 @@ private:
1206 return {}; 1197 return {};
1207 } 1198 }
1208 1199
1209 static constexpr OperationDecompilersArray operation_decompilers = { 1200 static constexpr std::array operation_decompilers = {
1210 &SPIRVDecompiler::Assign, 1201 &SPIRVDecompiler::Assign,
1211 1202
1212 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, 1203 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1291,8 +1282,7 @@ private:
1291 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, 1282 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1292 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, 1283 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1293 &SPIRVDecompiler::LogicalPick2, 1284 &SPIRVDecompiler::LogicalPick2,
1294 &SPIRVDecompiler::LogicalAll2, 1285 &SPIRVDecompiler::LogicalAnd2,
1295 &SPIRVDecompiler::LogicalAny2,
1296 1286
1297 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, 1287 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1298 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, 1288 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1357,6 +1347,7 @@ private:
1357 &SPIRVDecompiler::WorkGroupId<1>, 1347 &SPIRVDecompiler::WorkGroupId<1>,
1358 &SPIRVDecompiler::WorkGroupId<2>, 1348 &SPIRVDecompiler::WorkGroupId<2>,
1359 }; 1349 };
1350 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1360 1351
1361 const VKDevice& device; 1352 const VKDevice& device;
1362 const ShaderIR& ir; 1353 const ShaderIR& ir;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 29c8895c5..afffd157f 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -46,12 +46,12 @@ void ShaderIR::Decode() {
46 coverage_end = shader_info.end; 46 coverage_end = shader_info.end;
47 if (shader_info.decompilable) { 47 if (shader_info.decompilable) {
48 disable_flow_stack = true; 48 disable_flow_stack = true;
49 const auto insert_block = ([this](NodeBlock& nodes, u32 label) { 49 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
50 if (label == exit_branch) { 50 if (label == exit_branch) {
51 return; 51 return;
52 } 52 }
53 basic_blocks.insert({label, nodes}); 53 basic_blocks.insert({label, nodes});
54 }); 54 };
55 const auto& blocks = shader_info.blocks; 55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block; 56 NodeBlock current_block;
57 u32 current_label = exit_branch; 57 u32 current_label = exit_branch;
@@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
103} 103}
104 104
105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { 105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
106 const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { 106 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
107 Node result = n; 107 Node result = n;
108 if (cond.cc != ConditionCode::T) { 108 if (cond.cc != ConditionCode::T) {
109 result = Conditional(GetConditionCode(cond.cc), {result}); 109 result = Conditional(GetConditionCode(cond.cc), {result});
@@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
117 result = Conditional(GetPredicate(pred, is_neg), {result}); 117 result = Conditional(GetPredicate(pred, is_neg), {result});
118 } 118 }
119 return result; 119 return result;
120 }); 120 };
121 if (block.branch.address < 0) { 121 if (block.branch.address < 0) {
122 if (block.branch.kills) { 122 if (block.branch.kills) {
123 Node n = Operation(OperationCode::Discard); 123 Node n = Operation(OperationCode::Discard);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8..ad180d6df 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
25 25
26 Node op_b = [&]() { 26 Tegra::Shader::PredCondition cond{};
27 switch (opcode->get().GetId()) { 27 bool h_and{};
28 case OpCode::Id::HSETP2_R: 28 Node op_b{};
29 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 29 switch (opcode->get().GetId()) {
30 instr.hsetp2.negate_b); 30 case OpCode::Id::HSETP2_C:
31 default: 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 UNREACHABLE(); 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 return Immediate(0); 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
34 } 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 }(); 35 break;
36 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); 36 case OpCode::Id::HSETP2_IMM:
37 37 cond = instr.hsetp2.cbuf_and_imm.cond;
38 // We can't use the constant predicate as destination. 38 h_and = instr.hsetp2.cbuf_and_imm.h_and;
39 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 39 op_b = UnpackHalfImmediate(instr, true);
40 40 break;
41 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 41 case OpCode::Id::HSETP2_R:
42 cond = instr.hsetp2.reg.cond;
43 h_and = instr.hsetp2.reg.h_and;
44 op_b =
45 UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
46 instr.hsetp2.reg.negate_b),
47 instr.hsetp2.reg.type_b);
48 break;
49 default:
50 UNREACHABLE();
51 op_b = Immediate(0);
52 }
42 53
43 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
44 const OperationCode pair_combiner = 55 const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
45 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
46
47 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
48 const Node first_pred = Operation(pair_combiner, comparison);
49 56
50 // Set the primary predicate to the result of Predicate OP SecondPredicate 57 const auto Write = [&](u64 dest, Node src) {
51 const Node value = Operation(combiner, first_pred, second_pred); 58 SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
52 SetPredicate(bb, instr.hsetp2.pred3, value); 59 };
53 60
54 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
55 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled 62 const u64 first = instr.hsetp2.pred0;
56 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); 63 const u64 second = instr.hsetp2.pred3;
57 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); 64 if (h_and) {
65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
66 Write(first, joined);
67 Write(second, Operation(OperationCode::LogicalNegate, joined));
68 } else {
69 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
70 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
58 } 71 }
59 72
60 return pc; 73 return pc;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 24f022cc0..77151a24b 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, 95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
96 Tegra::Shader::ImageType type) { 96 Tegra::Shader::ImageType type) {
97 const Node image_register{GetRegister(reg)}; 97 const Node image_register{GetRegister(reg)};
98 const Node base_image{ 98 const auto [base_image, cbuf_index, cbuf_offset]{
99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; 99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
100 const auto cbuf{std::get_if<CbufNode>(&*base_image)};
101 const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
102 const auto cbuf_offset{cbuf_offset_imm->GetValue()};
103 const auto cbuf_index{cbuf->GetIndex()};
104 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; 100 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
105 101
106 // If this image has already been used, return the existing mapping. 102 // If this image has already been used, return the existing mapping.
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 80fc0ccfc..ed108bea8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
95 const Node op_b = 95 const Node op_b =
96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); 96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
97 97
98 SetTemporal(bb, 0, op_a); 98 SetTemporary(bb, 0, op_a);
99 SetTemporal(bb, 1, op_b); 99 SetTemporary(bb, 1, op_b);
100 SetRegister(bb, instr.gpr0, GetTemporal(0)); 100 SetRegister(bb, instr.gpr0, GetTemporary(0));
101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); 101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
102 break; 102 break;
103 } 103 }
104 default: 104 default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
136 } 136 }
137 }(); 137 }();
138 for (u32 i = 0; i < count; ++i) 138 for (u32 i = 0; i < count; ++i)
139 SetTemporal(bb, i, GetLmem(i * 4)); 139 SetTemporary(bb, i, GetLmem(i * 4));
140 for (u32 i = 0; i < count; ++i) 140 for (u32 i = 0; i < count; ++i)
141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
142 break; 142 break;
143 } 143 }
144 default: 144 default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
174 174
175 SetTemporal(bb, i, gmem); 175 SetTemporary(bb, i, gmem);
176 } 176 }
177 for (u32 i = 0; i < count; ++i) { 177 for (u32 i = 0; i < count; ++i) {
178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
179 } 179 }
180 break; 180 break;
181 } 181 }
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
253 TrackAndGetGlobalMemory(bb, instr, true); 253 TrackAndGetGlobalMemory(bb, instr, true);
254 254
255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} 255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
256 SetTemporal(bb, 0, real_address_base); 256 SetTemporary(bb, 0, real_address_base);
257 257
258 const u32 count = GetUniformTypeElementsCount(type); 258 const u32 count = GetUniformTypeElementsCount(type);
259 for (u32 i = 0; i < count; ++i) { 259 for (u32 i = 0; i < count; ++i) {
260 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); 260 SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
261 } 261 }
262 for (u32 i = 0; i < count; ++i) { 262 for (u32 i = 0; i < count; ++i) {
263 const Node it_offset = Immediate(i * 4); 263 const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
267 267
268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); 268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
269 } 269 }
270 break; 270 break;
271 } 271 }
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
297 const auto addr_register{GetRegister(instr.gmem.gpr)}; 297 const auto addr_register{GetRegister(instr.gmem.gpr)};
298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
299 299
300 const Node base_address{ 300 const auto [base_address, index, offset] =
301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; 301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
302 const auto cbuf = std::get_if<CbufNode>(&*base_address); 302 ASSERT(base_address != nullptr);
303 ASSERT(cbuf != nullptr);
304 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
305 ASSERT(cbuf_offset_imm != nullptr);
306 const auto cbuf_offset = cbuf_offset_imm->GetValue();
307 303
308 bb.push_back( 304 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
309 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
310 305
311 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; 306 const GlobalMemoryBase descriptor{index, offset};
312 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 307 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
313 auto& usage = entry->second; 308 auto& usage = entry->second;
314 if (is_write) { 309 if (is_write) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 42e3de02f..c0f64d7a0 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -102,7 +102,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
102 PRECISE, op_a, Immediate(3)); 102 PRECISE, op_a, Immediate(3));
103 const Node operand = 103 const Node operand =
104 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); 104 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
105 branch = Operation(OperationCode::BranchIndirect, convert); 105 branch = Operation(OperationCode::BranchIndirect, operand);
106 } 106 }
107 107
108 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 108 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 323be3f14..0b934a069 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
181 const Node value = 181 const Node value =
182 Operation(OperationCode::TextureQueryDimensions, meta, 182 Operation(OperationCode::TextureQueryDimensions, meta,
183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); 183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
184 SetTemporal(bb, indexer++, value); 184 SetTemporary(bb, indexer++, value);
185 } 185 }
186 for (u32 i = 0; i < indexer; ++i) { 186 for (u32 i = 0; i < indexer; ++i) {
187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
188 } 188 }
189 break; 189 break;
190 } 190 }
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
238 auto params = coords; 238 auto params = coords;
239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; 239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
241 SetTemporal(bb, indexer++, value); 241 SetTemporary(bb, indexer++, value);
242 } 242 }
243 for (u32 i = 0; i < indexer; ++i) { 243 for (u32 i = 0; i < indexer; ++i) {
244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
245 } 245 }
246 break; 246 break;
247 } 247 }
@@ -308,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
309 bool is_array, bool is_shadow) { 309 bool is_array, bool is_shadow) {
310 const Node sampler_register = GetRegister(reg); 310 const Node sampler_register = GetRegister(reg);
311 const Node base_sampler = 311 const auto [base_sampler, cbuf_index, cbuf_offset] =
312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
313 const auto cbuf = std::get_if<CbufNode>(&*base_sampler); 313 ASSERT(base_sampler != nullptr);
314 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
315 ASSERT(cbuf_offset_imm != nullptr);
316 const auto cbuf_offset = cbuf_offset_imm->GetValue();
317 const auto cbuf_index = cbuf->GetIndex();
318 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
319 315
320 // If this sampler has already been used, return the existing mapping. 316 // If this sampler has already been used, return the existing mapping.
@@ -340,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
340 // Skip disabled components 336 // Skip disabled components
341 continue; 337 continue;
342 } 338 }
343 SetTemporal(bb, dest_elem++, components[elem]); 339 SetTemporary(bb, dest_elem++, components[elem]);
344 } 340 }
345 // After writing values in temporals, move them to the real registers 341 // After writing values in temporals, move them to the real registers
346 for (u32 i = 0; i < dest_elem; ++i) { 342 for (u32 i = 0; i < dest_elem; ++i) {
347 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 343 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
348 } 344 }
349} 345}
350 346
@@ -357,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
357 for (u32 component = 0; component < 4; ++component) { 353 for (u32 component = 0; component < 4; ++component) {
358 if (!instr.texs.IsComponentEnabled(component)) 354 if (!instr.texs.IsComponentEnabled(component))
359 continue; 355 continue;
360 SetTemporal(bb, dest_elem++, components[component]); 356 SetTemporary(bb, dest_elem++, components[component]);
361 } 357 }
362 358
363 for (u32 i = 0; i < dest_elem; ++i) { 359 for (u32 i = 0; i < dest_elem; ++i) {
364 if (i < 2) { 360 if (i < 2) {
365 // Write the first two swizzle components to gpr0 and gpr0+1 361 // Write the first two swizzle components to gpr0 and gpr0+1
366 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); 362 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
367 } else { 363 } else {
368 ASSERT(instr.texs.HasTwoDestinations()); 364 ASSERT(instr.texs.HasTwoDestinations());
369 // Write the rest of the swizzle components to gpr28 and gpr28+1 365 // Write the rest of the swizzle components to gpr28 and gpr28+1
370 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); 366 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
371 } 367 }
372 } 368 }
373} 369}
@@ -395,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
395 return; 391 return;
396 } 392 }
397 393
398 SetTemporal(bb, 0, first_value); 394 SetTemporary(bb, 0, first_value);
399 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); 395 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
400 396
401 SetRegister(bb, instr.gpr0, GetTemporal(0)); 397 SetRegister(bb, instr.gpr0, GetTemporary(0));
402 SetRegister(bb, instr.gpr28, GetTemporal(1)); 398 SetRegister(bb, instr.gpr28, GetTemporary(1));
403} 399}
404 400
405Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 401Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 93dee77d1..206961909 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
73 if (is_psl) { 73 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
75 } 75 }
76 SetTemporal(bb, 0, product); 76 SetTemporary(bb, 0, product);
77 product = GetTemporal(0); 77 product = GetTemporary(0);
78 78
79 const Node original_c = op_c; 79 const Node original_c = op_c;
80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error 80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
98 } 98 }
99 }(); 99 }();
100 100
101 SetTemporal(bb, 1, op_c); 101 SetTemporary(bb, 1, op_c);
102 op_c = GetTemporal(1); 102 op_c = GetTemporary(1);
103 103
104 // TODO(Rodrigo): Use an appropiate sign for this operation 104 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 105 Node sum = Operation(OperationCode::IAdd, product, op_c);
106 SetTemporal(bb, 2, sum); 106 SetTemporary(bb, 2, sum);
107 sum = GetTemporal(2); 107 sum = GetTemporary(2);
108 if (is_merge) { 108 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 109 const Node a = BitfieldExtract(sum, 0, 16);
110 const Node b = 110 const Node b =
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 7427ed896..715184d67 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -101,8 +101,7 @@ enum class OperationCode {
101 LogicalXor, /// (bool a, bool b) -> bool 101 LogicalXor, /// (bool a, bool b) -> bool
102 LogicalNegate, /// (bool a) -> bool 102 LogicalNegate, /// (bool a) -> bool
103 LogicalPick2, /// (bool2 pair, uint index) -> bool 103 LogicalPick2, /// (bool2 pair, uint index) -> bool
104 LogicalAll2, /// (bool2 a) -> bool 104 LogicalAnd2, /// (bool2 a) -> bool
105 LogicalAny2, /// (bool2 a) -> bool
106 105
107 LogicalFLessThan, /// (float a, float b) -> bool 106 LogicalFLessThan, /// (float a, float b) -> bool
108 LogicalFEqual, /// (float a, float b) -> bool 107 LogicalFEqual, /// (float a, float b) -> bool
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6fccbbba3..b3dcd291c 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14Node Conditional(Node condition, std::vector<Node> code) { 14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(condition, std::move(code)); 15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16} 16}
17 17
18Node Comment(std::string text) { 18Node Comment(std::string text) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index caa409788..5e91fe129 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
61 const auto [entry, is_new] = used_cbufs.try_emplace(index); 61 const auto [entry, is_new] = used_cbufs.try_emplace(index);
62 entry->second.MarkAsUsedIndirect(); 62 entry->second.MarkAsUsedIndirect();
63 63
64 const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); 64 Node final_offset = [&] {
65 return MakeNode<CbufNode>(index, final_offset); 65 // Attempt to inline constant buffer without a variable offset. This is done to allow
66 // tracking LDC calls.
67 if (const auto gpr = std::get_if<GprNode>(&*node)) {
68 if (gpr->GetIndex() == Register::ZeroIndex) {
69 return Immediate(offset);
70 }
71 }
72 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
73 }();
74 return MakeNode<CbufNode>(index, std::move(final_offset));
66} 75}
67 76
68Node ShaderIR::GetPredicate(u64 pred_, bool negated) { 77Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
80 89
81Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { 90Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
82 used_input_attributes.emplace(index); 91 used_input_attributes.emplace(index);
83 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 92 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
84} 93}
85 94
86Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { 95Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -113,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
113 } 122 }
114 used_output_attributes.insert(index); 123 used_output_attributes.insert(index);
115 124
116 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 125 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
117} 126}
118 127
119Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { 128Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -125,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
125} 134}
126 135
127Node ShaderIR::GetLocalMemory(Node address) { 136Node ShaderIR::GetLocalMemory(Node address) {
128 return MakeNode<LmemNode>(address); 137 return MakeNode<LmemNode>(std::move(address));
129} 138}
130 139
131Node ShaderIR::GetTemporal(u32 id) { 140Node ShaderIR::GetTemporary(u32 id) {
132 return GetRegister(Register::ZeroIndex + 1 + id); 141 return GetRegister(Register::ZeroIndex + 1 + id);
133} 142}
134 143
135Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { 144Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
136 if (absolute) { 145 if (absolute) {
137 value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); 146 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
138 } 147 }
139 if (negate) { 148 if (negate) {
140 value = Operation(OperationCode::FNegate, NO_PRECISE, value); 149 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
141 } 150 }
142 return value; 151 return value;
143} 152}
@@ -146,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
146 if (!saturate) { 155 if (!saturate) {
147 return value; 156 return value;
148 } 157 }
149 const Node positive_zero = Immediate(std::copysignf(0, 1)); 158
150 const Node positive_one = Immediate(1.0f); 159 Node positive_zero = Immediate(std::copysignf(0, 1));
151 return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); 160 Node positive_one = Immediate(1.0f);
161 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
162 std::move(positive_one));
152} 163}
153 164
154Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { 165Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
155 switch (size) { 166 switch (size) {
156 case Register::Size::Byte: 167 case Register::Size::Byte:
157 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 168 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
158 Immediate(24)); 169 std::move(value), Immediate(24));
159 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 170 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
160 Immediate(24)); 171 std::move(value), Immediate(24));
161 return value; 172 return value;
162 case Register::Size::Short: 173 case Register::Size::Short:
163 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 174 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
164 Immediate(16)); 175 std::move(value), Immediate(16));
165 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 176 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
166 Immediate(16)); 177 std::move(value), Immediate(16));
167 case Register::Size::Word: 178 case Register::Size::Word:
168 // Default - do nothing 179 // Default - do nothing
169 return value; 180 return value;
@@ -179,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
179 return value; 190 return value;
180 } 191 }
181 if (absolute) { 192 if (absolute) {
182 value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); 193 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
183 } 194 }
184 if (negate) { 195 if (negate) {
185 value = Operation(OperationCode::INegate, NO_PRECISE, value); 196 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
186 } 197 }
187 return value; 198 return value;
188} 199}
189 200
190Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { 201Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
191 const Node value = Immediate(instr.half_imm.PackImmediates()); 202 Node value = Immediate(instr.half_imm.PackImmediates());
192 if (!has_negation) { 203 if (!has_negation) {
193 return value; 204 return value;
194 } 205 }
195 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
196 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
197 206
198 return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); 207 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
208 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
209
210 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
211 std::move(second_negate));
199} 212}
200 213
201Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { 214Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
202 return Operation(OperationCode::HUnpack, type, value); 215 return Operation(OperationCode::HUnpack, type, std::move(value));
203} 216}
204 217
205Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { 218Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -207,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
207 case Tegra::Shader::HalfMerge::H0_H1: 220 case Tegra::Shader::HalfMerge::H0_H1:
208 return src; 221 return src;
209 case Tegra::Shader::HalfMerge::F32: 222 case Tegra::Shader::HalfMerge::F32:
210 return Operation(OperationCode::HMergeF32, src); 223 return Operation(OperationCode::HMergeF32, std::move(src));
211 case Tegra::Shader::HalfMerge::Mrg_H0: 224 case Tegra::Shader::HalfMerge::Mrg_H0:
212 return Operation(OperationCode::HMergeH0, dest, src); 225 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
213 case Tegra::Shader::HalfMerge::Mrg_H1: 226 case Tegra::Shader::HalfMerge::Mrg_H1:
214 return Operation(OperationCode::HMergeH1, dest, src); 227 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
215 } 228 }
216 UNREACHABLE(); 229 UNREACHABLE();
217 return src; 230 return src;
@@ -219,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
219 232
220Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { 233Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
221 if (absolute) { 234 if (absolute) {
222 value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); 235 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
223 } 236 }
224 if (negate) { 237 if (negate) {
225 value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), 238 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
226 GetPredicate(true)); 239 GetPredicate(true));
227 } 240 }
228 return value; 241 return value;
@@ -232,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
232 if (!saturate) { 245 if (!saturate) {
233 return value; 246 return value;
234 } 247 }
235 const Node positive_zero = Immediate(std::copysignf(0, 1)); 248
236 const Node positive_one = Immediate(1.0f); 249 Node positive_zero = Immediate(std::copysignf(0, 1));
237 return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); 250 Node positive_one = Immediate(1.0f);
251 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
252 std::move(positive_one));
238} 253}
239 254
240Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 255Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -262,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
262 condition == PredCondition::LessEqualWithNan || 277 condition == PredCondition::LessEqualWithNan ||
263 condition == PredCondition::GreaterThanWithNan || 278 condition == PredCondition::GreaterThanWithNan ||
264 condition == PredCondition::GreaterEqualWithNan) { 279 condition == PredCondition::GreaterEqualWithNan) {
265
266 predicate = Operation(OperationCode::LogicalOr, predicate, 280 predicate = Operation(OperationCode::LogicalOr, predicate,
267 Operation(OperationCode::LogicalFIsNan, op_a)); 281 Operation(OperationCode::LogicalFIsNan, op_a));
268 predicate = Operation(OperationCode::LogicalOr, predicate, 282 predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -291,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
291 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
292 "Unknown predicate comparison operation"); 306 "Unknown predicate comparison operation");
293 307
294 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); 308 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
309 std::move(op_b));
295 310
296 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || 311 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
297 condition == PredCondition::NotEqualWithNan || 312 condition == PredCondition::NotEqualWithNan ||
@@ -321,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
321 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 336 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
322 "Unknown predicate comparison operation"); 337 "Unknown predicate comparison operation");
323 338
324 const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); 339 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
325
326 return predicate;
327} 340}
328 341
329OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 342OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -349,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
349} 362}
350 363
351void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { 364void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
352 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); 365 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
353} 366}
354 367
355void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { 368void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
356 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); 369 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
357} 370}
358 371
359void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { 372void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
360 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); 373 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
361} 374}
362 375
363void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { 376void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
364 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); 377 bb.push_back(
378 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
365} 379}
366 380
367void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { 381void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
368 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 382 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
369} 383}
370 384
371void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { 385void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
372 if (!sets_cc) { 386 if (!sets_cc) {
373 return; 387 return;
374 } 388 }
375 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); 389 Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
376 SetInternalFlag(bb, InternalFlag::Zero, zerop); 390 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
377 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 391 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
378} 392}
379 393
@@ -381,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
381 if (!sets_cc) { 395 if (!sets_cc) {
382 return; 396 return;
383 } 397 }
384 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); 398 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
385 SetInternalFlag(bb, InternalFlag::Zero, zerop); 399 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
386 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 400 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
387} 401}
388 402
389Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { 403Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
390 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), 404 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
391 Immediate(bits)); 405 Immediate(offset), Immediate(bits));
392} 406}
393 407
394} // namespace VideoCommon::Shader 408} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 03c888def..59a083d90 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstring>
9#include <map> 8#include <map>
10#include <optional> 9#include <optional>
11#include <set> 10#include <set>
12#include <string>
13#include <tuple> 11#include <tuple>
14#include <variant>
15#include <vector> 12#include <vector>
16 13
17#include "common/common_types.h" 14#include "common/common_types.h"
@@ -210,8 +207,8 @@ private:
210 Node GetInternalFlag(InternalFlag flag, bool negated = false); 207 Node GetInternalFlag(InternalFlag flag, bool negated = false);
211 /// Generates a node representing a local memory address 208 /// Generates a node representing a local memory address
212 Node GetLocalMemory(Node address); 209 Node GetLocalMemory(Node address);
213 /// Generates a temporal, internally it uses a post-RZ register 210 /// Generates a temporary, internally it uses a post-RZ register
214 Node GetTemporal(u32 id); 211 Node GetTemporary(u32 id);
215 212
216 /// Sets a register. src value must be a number-evaluated node. 213 /// Sets a register. src value must be a number-evaluated node.
217 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); 214 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -221,8 +218,8 @@ private:
221 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); 218 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
222 /// Sets a local memory address. address and value must be a number-evaluated node 219 /// Sets a local memory address. address and value must be a number-evaluated node
223 void SetLocalMemory(NodeBlock& bb, Node address, Node value); 220 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
224 /// Sets a temporal. Internally it uses a post-RZ register 221 /// Sets a temporary. Internally it uses a post-RZ register
225 void SetTemporal(NodeBlock& bb, u32 id, Node value); 222 void SetTemporary(NodeBlock& bb, u32 id, Node value);
226 223
227 /// Sets internal flags from a float 224 /// Sets internal flags from a float
228 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); 225 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
@@ -328,7 +325,7 @@ private:
328 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 325 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
329 Node op_c, Node imm_lut, bool sets_cc); 326 Node op_c, Node imm_lut, bool sets_cc);
330 327
331 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 328 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
332 329
333 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 330 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
334 331
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index fc957d980..a53e02253 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,56 +15,63 @@ namespace {
15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
16 OperationCode operation_code) { 16 OperationCode operation_code) {
17 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
18 const Node node = code.at(cursor); 18 Node node = code.at(cursor);
19
19 if (const auto operation = std::get_if<OperationNode>(&*node)) { 20 if (const auto operation = std::get_if<OperationNode>(&*node)) {
20 if (operation->GetCode() == operation_code) { 21 if (operation->GetCode() == operation_code) {
21 return {node, cursor}; 22 return {std::move(node), cursor};
22 } 23 }
23 } 24 }
25
24 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 26 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
25 const auto& conditional_code = conditional->GetCode(); 27 const auto& conditional_code = conditional->GetCode();
26 const auto [found, internal_cursor] = FindOperation( 28 auto [found, internal_cursor] = FindOperation(
27 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); 29 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
28 if (found) { 30 if (found) {
29 return {found, cursor}; 31 return {std::move(found), cursor};
30 } 32 }
31 } 33 }
32 } 34 }
33 return {}; 35 return {};
34} 36}
35} // namespace 37} // Anonymous namespace
36 38
37Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { 39std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
40 s64 cursor) const {
38 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 41 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
39 // Cbuf found, but it has to be immediate 42 // Constant buffer found, test if it's an immediate
40 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; 43 const auto offset = cbuf->GetOffset();
44 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
45 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
46 }
47 return {};
41 } 48 }
42 if (const auto gpr = std::get_if<GprNode>(&*tracked)) { 49 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
43 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 50 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
44 return nullptr; 51 return {};
45 } 52 }
46 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 53 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
47 // register that it uses as operand 54 // register that it uses as operand
48 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 55 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
49 if (!source) { 56 if (!source) {
50 return nullptr; 57 return {};
51 } 58 }
52 return TrackCbuf(source, code, new_cursor); 59 return TrackCbuf(source, code, new_cursor);
53 } 60 }
54 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 61 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
55 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { 62 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
56 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { 63 if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) {
57 // Cbuf found in operand 64 // Cbuf found in operand.
58 return found; 65 return found;
59 } 66 }
60 } 67 }
61 return nullptr; 68 return {};
62 } 69 }
63 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { 70 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
64 const auto& conditional_code = conditional->GetCode(); 71 const auto& conditional_code = conditional->GetCode();
65 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); 72 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
66 } 73 }
67 return nullptr; 74 return {};
68} 75}
69 76
70std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { 77std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 7f9623c62..a3a3770a7 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,10 +116,10 @@ public:
116 std::lock_guard lock{mutex}; 116 std::lock_guard lock{mutex};
117 auto& maxwell3d = system.GPU().Maxwell3D(); 117 auto& maxwell3d = system.GPU().Maxwell3D();
118 118
119 if (!maxwell3d.dirty_flags.zeta_buffer) { 119 if (!maxwell3d.dirty.depth_buffer) {
120 return depth_buffer.view; 120 return depth_buffer.view;
121 } 121 }
122 maxwell3d.dirty_flags.zeta_buffer = false; 122 maxwell3d.dirty.depth_buffer = false;
123 123
124 const auto& regs{maxwell3d.regs}; 124 const auto& regs{maxwell3d.regs};
125 const auto gpu_addr{regs.zeta.Address()}; 125 const auto gpu_addr{regs.zeta.Address()};
@@ -145,10 +145,10 @@ public:
145 std::lock_guard lock{mutex}; 145 std::lock_guard lock{mutex};
146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
147 auto& maxwell3d = system.GPU().Maxwell3D(); 147 auto& maxwell3d = system.GPU().Maxwell3D();
148 if (!maxwell3d.dirty_flags.color_buffer[index]) { 148 if (!maxwell3d.dirty.render_target[index]) {
149 return render_targets[index].view; 149 return render_targets[index].view;
150 } 150 }
151 maxwell3d.dirty_flags.color_buffer.reset(index); 151 maxwell3d.dirty.render_target[index] = false;
152 152
153 const auto& regs{maxwell3d.regs}; 153 const auto& regs{maxwell3d.regs};
154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -274,10 +274,11 @@ protected:
274 auto& maxwell3d = system.GPU().Maxwell3D(); 274 auto& maxwell3d = system.GPU().Maxwell3D();
275 const u32 index = surface->GetRenderTarget(); 275 const u32 index = surface->GetRenderTarget();
276 if (index == DEPTH_RT) { 276 if (index == DEPTH_RT) {
277 maxwell3d.dirty_flags.zeta_buffer = true; 277 maxwell3d.dirty.depth_buffer = true;
278 } else { 278 } else {
279 maxwell3d.dirty_flags.color_buffer.set(index, true); 279 maxwell3d.dirty.render_target[index] = true;
280 } 280 }
281 maxwell3d.dirty.render_settings = true;
281 } 282 }
282 283
283 void Register(TSurface surface) { 284 void Register(TSurface surface) {