summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/kernel/process.cpp29
-rw-r--r--src/core/hle/kernel/process.h11
-rw-r--r--src/core/hle/kernel/svc.cpp4
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/kepler_compute.cpp9
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp305
-rw-r--r--src/video_core/engines/maxwell_3d.h134
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h26
-rw-r--r--src/video_core/gpu.cpp8
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp290
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp152
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h22
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp39
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/shader/decode.cpp8
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp69
-rw-r--r--src/video_core/shader/decode/memory.cpp22
-rw-r--r--src/video_core/shader/decode/texture.cpp26
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h3
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp111
-rw-r--r--src/video_core/shader/shader_ir.h11
-rw-r--r--src/video_core/shader/track.cpp10
-rw-r--r--src/video_core/texture_cache/texture_cache.h13
40 files changed, 1188 insertions, 432 deletions
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index db3ab14ce..92169a97b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
184} 184}
185 185
186void Process::Run(s32 main_thread_priority, u64 stack_size) { 186void Process::Run(s32 main_thread_priority, u64 stack_size) {
187 // The kernel always ensures that the given stack size is page aligned. 187 AllocateMainThreadStack(stack_size);
188 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); 188 tls_region_address = CreateTLSRegion();
189
190 // Allocate and map the main thread stack
191 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
192 // of the user address space.
193 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
194 vm_manager
195 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
196 0, main_thread_stack_size, MemoryState::Stack)
197 .Unwrap();
198 189
199 vm_manager.LogLayout(); 190 vm_manager.LogLayout();
191
200 ChangeStatus(ProcessStatus::Running); 192 ChangeStatus(ProcessStatus::Running);
201 193
202 SetupMainThread(*this, kernel, main_thread_priority); 194 SetupMainThread(*this, kernel, main_thread_priority);
@@ -226,6 +218,9 @@ void Process::PrepareForTermination() {
226 stop_threads(system.Scheduler(2).GetThreadList()); 218 stop_threads(system.Scheduler(2).GetThreadList());
227 stop_threads(system.Scheduler(3).GetThreadList()); 219 stop_threads(system.Scheduler(3).GetThreadList());
228 220
221 FreeTLSRegion(tls_region_address);
222 tls_region_address = 0;
223
229 ChangeStatus(ProcessStatus::Exited); 224 ChangeStatus(ProcessStatus::Exited);
230} 225}
231 226
@@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
325 WakeupAllWaitingThreads(); 320 WakeupAllWaitingThreads();
326} 321}
327 322
323void Process::AllocateMainThreadStack(u64 stack_size) {
324 // The kernel always ensures that the given stack size is page aligned.
325 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
326
327 // Allocate and map the main thread stack
328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
329 vm_manager
330 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
331 0, main_thread_stack_size, MemoryState::Stack)
332 .Unwrap();
333}
334
328} // namespace Kernel 335} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 3196014da..c2df451f3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,11 @@ public:
135 return mutex; 135 return mutex;
136 } 136 }
137 137
138 /// Gets the address to the process' dedicated TLS region.
139 VAddr GetTLSRegionAddress() const {
140 return tls_region_address;
141 }
142
138 /// Gets the current status of the process 143 /// Gets the current status of the process
139 ProcessStatus GetStatus() const { 144 ProcessStatus GetStatus() const {
140 return status; 145 return status;
@@ -296,6 +301,9 @@ private:
296 /// a process signal. 301 /// a process signal.
297 void ChangeStatus(ProcessStatus new_status); 302 void ChangeStatus(ProcessStatus new_status);
298 303
304 /// Allocates the main thread stack for the process, given the stack size in bytes.
305 void AllocateMainThreadStack(u64 stack_size);
306
299 /// Memory manager for this process. 307 /// Memory manager for this process.
300 Kernel::VMManager vm_manager; 308 Kernel::VMManager vm_manager;
301 309
@@ -358,6 +366,9 @@ private:
358 /// variable related facilities. 366 /// variable related facilities.
359 Mutex mutex; 367 Mutex mutex;
360 368
369 /// Address indicating the location of the process' dedicated TLS region.
370 VAddr tls_region_address = 0;
371
361 /// Random values for svcGetInfo RandomEntropy 372 /// Random values for svcGetInfo RandomEntropy
362 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; 373 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
363 374
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 0687839ff..1fd1a732a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
843 return RESULT_SUCCESS; 843 return RESULT_SUCCESS;
844 844
845 case GetInfoType::UserExceptionContextAddr: 845 case GetInfoType::UserExceptionContextAddr:
846 LOG_WARNING(Kernel_SVC, 846 *result = process->GetTLSRegionAddress();
847 "(STUBBED) Attempted to query user exception context address, returned 0");
848 *result = 0;
849 return RESULT_SUCCESS; 847 return RESULT_SUCCESS;
850 848
851 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: 849 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..bd036cbe8 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); 25 gpu.Maxwell3D().dirty.OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7404a8163..08586d33c 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
37 const bool is_last_call = method_call.IsLastCall(); 37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call); 38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) { 39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 40 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
41 } 41 }
42 break; 42 break;
43 } 43 }
@@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
50} 50}
51 51
52void KeplerCompute::ProcessLaunch() { 52void KeplerCompute::ProcessLaunch() {
53
54 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 53 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
55 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 54 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
56 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 55 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
57 56
58 const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; 57 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
59 LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); 58 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
59
60 rasterizer.DispatchCompute(code_addr);
60} 61}
61 62
62} // namespace Tegra::Engines 63} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..44279de00 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 37 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..74c46ec04 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitDirtySettings();
25 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
26} 27}
27 28
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
69 regs.stencil_back_func_mask = 0xFFFFFFFF; 70 regs.stencil_back_func_mask = 0xFFFFFFFF;
70 regs.stencil_back_mask = 0xFFFFFFFF; 71 regs.stencil_back_mask = 0xFFFFFFFF;
71 72
73 regs.depth_test_func = Regs::ComparisonOp::Always;
74 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
75 regs.cull.cull_face = Regs::Cull::CullFace::Back;
76
72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 77 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
73 // register carrying a default value. Assume it's OpenGL's default (1). 78 // register carrying a default value. Assume it's OpenGL's default (1).
74 regs.point_size = 1.0f; 79 regs.point_size = 1.0f;
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
86 regs.rt_separate_frag_data = 1; 91 regs.rt_separate_frag_data = 1;
87} 92}
88 93
94#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
95
96void Maxwell3D::InitDirtySettings() {
97 const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
98 const auto start_itr = dirty_pointers.begin() + start;
99 const auto end_itr = start_itr + range;
100 std::fill(start_itr, end_itr, position);
101 };
102 dirty.regs.fill(true);
103
104 // Init Render Targets
105 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
106 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
107 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
108 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
109 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
110 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
111 rt_dirty_reg++;
112 }
113 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
114 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
115 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
116 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
117 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
118 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
119 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
120
121 // Init Vertex Arrays
122 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
123 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
124 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
125 u32 va_reg = DIRTY_REGS_POS(vertex_array);
126 u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
127 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
128 vertex_reg += vertex_array_size) {
129 set_block(vertex_reg, 3, va_reg);
130 // The divisor concerns vertex array instances
131 dirty_pointers[vertex_reg + 3] = vi_reg;
132 va_reg++;
133 vi_reg++;
134 }
135 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
136 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
137 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
138 va_reg = DIRTY_REGS_POS(vertex_array);
139 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
140 vertex_reg += vertex_limit_size) {
141 set_block(vertex_reg, vertex_limit_size, va_reg);
142 va_reg++;
143 }
144 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
145 constexpr u32 vertex_instance_size =
146 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
147 constexpr u32 vertex_instance_end =
148 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
149 vi_reg = DIRTY_REGS_POS(vertex_instance);
150 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
151 vertex_reg += vertex_instance_size) {
152 set_block(vertex_reg, vertex_instance_size, vi_reg);
153 vi_reg++;
154 }
155 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
156 DIRTY_REGS_POS(vertex_attrib_format));
157
158 // Init Shaders
159 constexpr u32 shader_registers_count =
160 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
161 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
162 DIRTY_REGS_POS(shaders));
163
164 // State
165
166 // Viewport
167 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
168 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
169 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
170 set_block(viewport_start, viewport_size, viewport_dirty_reg);
171 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
172 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
173 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
174
175 // Viewport transformation
176 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
177 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
178 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
179
180 // Cullmode
181 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
182 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
183 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
184
185 // Screen y control
186 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
187
188 // Primitive Restart
189 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
190 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
191 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
192
193 // Depth Test
194 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
195 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
196 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
197 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
198
199 // Stencil Test
200 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
201 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
202 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
205 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
206 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
207 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
210 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
211 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
212 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
217
218 // Color Mask
219 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
220 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
221 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
222 color_mask_dirty_reg);
223 // Blend State
224 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
225 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
226 blend_state_dirty_reg);
227 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
229 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
230 blend_state_dirty_reg);
231
232 // Scissor State
233 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
234 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
235 scissor_test_dirty_reg);
236
237 // Polygon Offset
238 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
239 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
240 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
241 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
242 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
243 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
244 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
245}
246
89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 247void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
90 // Reset the current macro. 248 // Reset the current macro.
91 executing_macro = 0; 249 executing_macro = 0;
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 266
109 const u32 method = method_call.method; 267 const u32 method = method_call.method;
110 268
269 if (method == cb_data_state.current) {
270 regs.reg_array[method] = method_call.argument;
271 ProcessCBData(method_call.argument);
272 return;
273 } else if (cb_data_state.current != null_cb_data) {
274 FinishCBData();
275 }
276
111 // It is an error to write to a register other than the current macro's ARG register before it 277 // It is an error to write to a register other than the current macro's ARG register before it
112 // has finished execution. 278 // has finished execution.
113 if (executing_macro != 0) { 279 if (executing_macro != 0) {
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
143 309
144 if (regs.reg_array[method] != method_call.argument) { 310 if (regs.reg_array[method] != method_call.argument) {
145 regs.reg_array[method] = method_call.argument; 311 regs.reg_array[method] = method_call.argument;
146 // Color buffers 312 const std::size_t dirty_reg = dirty_pointers[method];
147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 313 if (dirty_reg) {
148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 314 dirty.regs[dirty_reg] = true;
149 if (method >= first_rt_reg && 315 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 316 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; 317 dirty.vertex_array_buffers = true;
152 dirty_flags.color_buffer.set(rt_index); 318 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
153 } 319 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
154 320 dirty.vertex_instances = true;
155 // Zeta buffer 321 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 322 dirty_reg < DIRTY_REGS_POS(render_settings)) {
157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) || 323 dirty.render_settings = true;
158 method == MAXWELL3D_REG_INDEX(zeta_width) || 324 }
159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
162 dirty_flags.zeta_buffer = true;
163 }
164
165 // Shader
166 constexpr u32 shader_registers_count =
167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
170 dirty_flags.shaders = true;
171 }
172
173 // Vertex format
174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true;
177 }
178
179 // Vertex buffer
180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 } 325 }
190 } 326 }
191 327
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
214 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 350 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
215 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 351 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
216 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 352 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
217 ProcessCBData(method_call.argument); 353 StartCBData(method);
218 break; 354 break;
219 } 355 }
220 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 356 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -249,6 +385,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
249 ProcessQueryGet(); 385 ProcessQueryGet();
250 break; 386 break;
251 } 387 }
388 case MAXWELL3D_REG_INDEX(condition.mode): {
389 ProcessQueryCondition();
390 break;
391 }
252 case MAXWELL3D_REG_INDEX(sync_info): { 392 case MAXWELL3D_REG_INDEX(sync_info): {
253 ProcessSyncPoint(); 393 ProcessSyncPoint();
254 break; 394 break;
@@ -261,7 +401,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
261 const bool is_last_call = method_call.IsLastCall(); 401 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call); 402 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) { 403 if (is_last_call) {
264 dirty_flags.OnMemoryWrite(); 404 dirty.OnMemoryWrite();
265 } 405 }
266 break; 406 break;
267 } 407 }
@@ -302,6 +442,7 @@ void Maxwell3D::ProcessQueryGet() {
302 result = regs.query.query_sequence; 442 result = regs.query.query_sequence;
303 break; 443 break;
304 default: 444 default:
445 result = 1;
305 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 446 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
306 static_cast<u32>(regs.query.query_get.select.Value())); 447 static_cast<u32>(regs.query.query_get.select.Value()));
307 } 448 }
@@ -333,7 +474,6 @@ void Maxwell3D::ProcessQueryGet() {
333 query_result.timestamp = system.CoreTiming().GetTicks(); 474 query_result.timestamp = system.CoreTiming().GetTicks();
334 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); 475 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
335 } 476 }
336 dirty_flags.OnMemoryWrite();
337 break; 477 break;
338 } 478 }
339 default: 479 default:
@@ -342,6 +482,45 @@ void Maxwell3D::ProcessQueryGet() {
342 } 482 }
343} 483}
344 484
485void Maxwell3D::ProcessQueryCondition() {
486 const GPUVAddr condition_address{regs.condition.Address()};
487 switch (regs.condition.mode) {
488 case Regs::ConditionMode::Always: {
489 execute_on = true;
490 break;
491 }
492 case Regs::ConditionMode::Never: {
493 execute_on = false;
494 break;
495 }
496 case Regs::ConditionMode::ResNonZero: {
497 Regs::QueryCompare cmp;
498 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
499 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
500 break;
501 }
502 case Regs::ConditionMode::Equal: {
503 Regs::QueryCompare cmp;
504 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
505 execute_on =
506 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
507 break;
508 }
509 case Regs::ConditionMode::NotEqual: {
510 Regs::QueryCompare cmp;
511 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
512 execute_on =
513 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
514 break;
515 }
516 default: {
517 UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
518 execute_on = true;
519 break;
520 }
521 }
522}
523
345void Maxwell3D::ProcessSyncPoint() { 524void Maxwell3D::ProcessSyncPoint() {
346 const u32 sync_point = regs.sync_info.sync_point.Value(); 525 const u32 sync_point = regs.sync_info.sync_point.Value();
347 const u32 increment = regs.sync_info.increment.Value(); 526 const u32 increment = regs.sync_info.increment.Value();
@@ -405,23 +584,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
405} 584}
406 585
407void Maxwell3D::ProcessCBData(u32 value) { 586void Maxwell3D::ProcessCBData(u32 value) {
587 const u32 id = cb_data_state.id;
588 cb_data_state.buffer[id][cb_data_state.counter] = value;
589 // Increment the current buffer position.
590 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
591 cb_data_state.counter++;
592}
593
594void Maxwell3D::StartCBData(u32 method) {
595 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
596 cb_data_state.start_pos = regs.const_buffer.cb_pos;
597 cb_data_state.id = method - first_cb_data;
598 cb_data_state.current = method;
599 cb_data_state.counter = 0;
600 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
601}
602
603void Maxwell3D::FinishCBData() {
408 // Write the input value to the current const buffer at the current position. 604 // Write the input value to the current const buffer at the current position.
409 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 605 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
410 ASSERT(buffer_address != 0); 606 ASSERT(buffer_address != 0);
411 607
412 // Don't allow writing past the end of the buffer. 608 // Don't allow writing past the end of the buffer.
413 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 609 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
414
415 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
416 610
417 u8* ptr{memory_manager.GetPointer(address)}; 611 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
418 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 612 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
419 memory_manager.Write<u32>(address, value);
420 613
421 dirty_flags.OnMemoryWrite(); 614 const u32 id = cb_data_state.id;
615 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
616 dirty.OnMemoryWrite();
422 617
423 // Increment the current buffer position. 618 cb_data_state.id = null_cb_data;
424 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 619 cb_data_state.current = null_cb_data;
425} 620}
426 621
427Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 622Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8d15c8a48..1ee982b76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -90,6 +90,20 @@ public:
90 90
91 enum class QuerySelect : u32 { 91 enum class QuerySelect : u32 {
92 Zero = 0, 92 Zero = 0,
93 TimeElapsed = 2,
94 TransformFeedbackPrimitivesGenerated = 11,
95 PrimitivesGenerated = 18,
96 SamplesPassed = 21,
97 TransformFeedbackUnknown = 26,
98 };
99
100 struct QueryCompare {
101 u32 initial_sequence;
102 u32 initial_mode;
103 u32 unknown1;
104 u32 unknown2;
105 u32 current_sequence;
106 u32 current_mode;
93 }; 107 };
94 108
95 enum class QuerySyncCondition : u32 { 109 enum class QuerySyncCondition : u32 {
@@ -97,6 +111,14 @@ public:
97 GreaterThan = 1, 111 GreaterThan = 1,
98 }; 112 };
99 113
114 enum class ConditionMode : u32 {
115 Never = 0,
116 Always = 1,
117 ResNonZero = 2,
118 Equal = 3,
119 NotEqual = 4,
120 };
121
100 enum class ShaderProgram : u32 { 122 enum class ShaderProgram : u32 {
101 VertexA = 0, 123 VertexA = 0,
102 VertexB = 1, 124 VertexB = 1,
@@ -815,7 +837,18 @@ public:
815 BitField<4, 1, u32> alpha_to_one; 837 BitField<4, 1, u32> alpha_to_one;
816 } multisample_control; 838 } multisample_control;
817 839
818 INSERT_PADDING_WORDS(0x7); 840 INSERT_PADDING_WORDS(0x4);
841
842 struct {
843 u32 address_high;
844 u32 address_low;
845 ConditionMode mode;
846
847 GPUVAddr Address() const {
848 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
849 address_low);
850 }
851 } condition;
819 852
820 struct { 853 struct {
821 u32 tsc_address_high; 854 u32 tsc_address_high;
@@ -1124,23 +1157,77 @@ public:
1124 1157
1125 State state{}; 1158 State state{};
1126 1159
1127 struct DirtyFlags { 1160 struct DirtyRegs {
1128 std::bitset<8> color_buffer{0xFF}; 1161 static constexpr std::size_t NUM_REGS = 256;
1129 std::bitset<32> vertex_array{0xFFFFFFFF}; 1162 union {
1163 struct {
1164 bool null_dirty;
1165
1166 // Vertex Attributes
1167 bool vertex_attrib_format;
1168
1169 // Vertex Arrays
1170 std::array<bool, 32> vertex_array;
1171
1172 bool vertex_array_buffers;
1173
1174 // Vertex Instances
1175 std::array<bool, 32> vertex_instance;
1130 1176
1131 bool vertex_attrib_format = true; 1177 bool vertex_instances;
1132 bool zeta_buffer = true; 1178
1133 bool shaders = true; 1179 // Render Targets
1180 std::array<bool, 8> render_target;
1181 bool depth_buffer;
1182
1183 bool render_settings;
1184
1185 // Shaders
1186 bool shaders;
1187
1188 // Rasterizer State
1189 bool viewport;
1190 bool clip_coefficient;
1191 bool cull_mode;
1192 bool primitive_restart;
1193 bool depth_test;
1194 bool stencil_test;
1195 bool blend_state;
1196 bool scissor_test;
1197 bool transform_feedback;
1198 bool color_mask;
1199 bool polygon_offset;
1200
1201 // Complementary
1202 bool viewport_transform;
1203 bool screen_y_control;
1204
1205 bool memory_general;
1206 };
1207 std::array<bool, NUM_REGS> regs;
1208 };
1209
1210 void ResetVertexArrays() {
1211 vertex_array.fill(true);
1212 vertex_array_buffers = true;
1213 }
1214
1215 void ResetRenderTargets() {
1216 depth_buffer = true;
1217 render_target.fill(true);
1218 render_settings = true;
1219 }
1134 1220
1135 void OnMemoryWrite() { 1221 void OnMemoryWrite() {
1136 zeta_buffer = true;
1137 shaders = true; 1222 shaders = true;
1138 color_buffer.set(); 1223 memory_general = true;
1139 vertex_array.set(); 1224 ResetRenderTargets();
1225 ResetVertexArrays();
1140 } 1226 }
1141 };
1142 1227
1143 DirtyFlags dirty_flags; 1228 } dirty{};
1229
1230 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1144 1231
1145 /// Reads a register value located at the input method address 1232 /// Reads a register value located at the input method address
1146 u32 GetRegisterValue(u32 method) const; 1233 u32 GetRegisterValue(u32 method) const;
@@ -1169,6 +1256,10 @@ public:
1169 return macro_memory; 1256 return macro_memory;
1170 } 1257 }
1171 1258
1259 bool ShouldExecute() const {
1260 return execute_on;
1261 }
1262
1172private: 1263private:
1173 void InitializeRegisterDefaults(); 1264 void InitializeRegisterDefaults();
1174 1265
@@ -1192,14 +1283,27 @@ private:
1192 /// Interpreter for the macro codes uploaded to the GPU. 1283 /// Interpreter for the macro codes uploaded to the GPU.
1193 MacroInterpreter macro_interpreter; 1284 MacroInterpreter macro_interpreter;
1194 1285
1286 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1287 struct {
1288 std::array<std::array<u32, 0x4000>, 16> buffer;
1289 u32 current{null_cb_data};
1290 u32 id{null_cb_data};
1291 u32 start_pos{};
1292 u32 counter{};
1293 } cb_data_state;
1294
1195 Upload::State upload_state; 1295 Upload::State upload_state;
1196 1296
1297 bool execute_on{true};
1298
1197 /// Retrieves information about a specific TIC entry from the TIC buffer. 1299 /// Retrieves information about a specific TIC entry from the TIC buffer.
1198 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1300 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1199 1301
1200 /// Retrieves information about a specific TSC entry from the TSC buffer. 1302 /// Retrieves information about a specific TSC entry from the TSC buffer.
1201 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1303 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1202 1304
1305 void InitDirtySettings();
1306
1203 /** 1307 /**
1204 * Call a macro on this engine. 1308 * Call a macro on this engine.
1205 * @param method Method to call 1309 * @param method Method to call
@@ -1219,11 +1323,16 @@ private:
1219 /// Handles a write to the QUERY_GET register. 1323 /// Handles a write to the QUERY_GET register.
1220 void ProcessQueryGet(); 1324 void ProcessQueryGet();
1221 1325
1326 // Handles Conditional Rendering
1327 void ProcessQueryCondition();
1328
1222 /// Handles writes to syncing register. 1329 /// Handles writes to syncing register.
1223 void ProcessSyncPoint(); 1330 void ProcessSyncPoint();
1224 1331
1225 /// Handles a write to the CB_DATA[i] register. 1332 /// Handles a write to the CB_DATA[i] register.
1333 void StartCBData(u32 method);
1226 void ProcessCBData(u32 value); 1334 void ProcessCBData(u32 value);
1335 void FinishCBData();
1227 1336
1228 /// Handles a write to the CB_BIND register. 1337 /// Handles a write to the CB_BIND register.
1229 void ProcessCBBind(Regs::ShaderStage stage); 1338 void ProcessCBBind(Regs::ShaderStage stage);
@@ -1290,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1290ASSERT_REG_POSITION(point_size, 0x546); 1399ASSERT_REG_POSITION(point_size, 0x546);
1291ASSERT_REG_POSITION(zeta_enable, 0x54E); 1400ASSERT_REG_POSITION(zeta_enable, 0x54E);
1292ASSERT_REG_POSITION(multisample_control, 0x54F); 1401ASSERT_REG_POSITION(multisample_control, 0x54F);
1402ASSERT_REG_POSITION(condition, 0x554);
1293ASSERT_REG_POSITION(tsc, 0x557); 1403ASSERT_REG_POSITION(tsc, 0x557);
1294ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1404ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
1295ASSERT_REG_POSITION(tic, 0x55D); 1405ASSERT_REG_POSITION(tic, 0x55D);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 758c154cb..a28c04473 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
58 } 58 }
59 59
60 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
62 62
63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 79d469b88..8520a0143 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -931,8 +931,6 @@ union Instruction {
931 } csetp; 931 } csetp;
932 932
933 union { 933 union {
934 BitField<35, 4, PredCondition> cond;
935 BitField<49, 1, u64> h_and;
936 BitField<6, 1, u64> ftz; 934 BitField<6, 1, u64> ftz;
937 BitField<45, 2, PredOperation> op; 935 BitField<45, 2, PredOperation> op;
938 BitField<3, 3, u64> pred3; 936 BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
940 BitField<43, 1, u64> negate_a; 938 BitField<43, 1, u64> negate_a;
941 BitField<44, 1, u64> abs_a; 939 BitField<44, 1, u64> abs_a;
942 BitField<47, 2, HalfType> type_a; 940 BitField<47, 2, HalfType> type_a;
943 BitField<31, 1, u64> negate_b; 941 union {
944 BitField<30, 1, u64> abs_b; 942 BitField<35, 4, PredCondition> cond;
945 BitField<28, 2, HalfType> type_b; 943 BitField<49, 1, u64> h_and;
944 BitField<31, 1, u64> negate_b;
945 BitField<30, 1, u64> abs_b;
946 BitField<28, 2, HalfType> type_b;
947 } reg;
948 union {
949 BitField<56, 1, u64> negate_b;
950 BitField<54, 1, u64> abs_b;
951 } cbuf;
952 union {
953 BitField<49, 4, PredCondition> cond;
954 BitField<53, 1, u64> h_and;
955 } cbuf_and_imm;
946 BitField<42, 1, u64> neg_pred; 956 BitField<42, 1, u64> neg_pred;
947 BitField<39, 3, u64> pred39; 957 BitField<39, 3, u64> pred39;
948 } hsetp2; 958 } hsetp2;
@@ -1548,7 +1558,9 @@ public:
1548 HFMA2_RC, 1558 HFMA2_RC,
1549 HFMA2_RR, 1559 HFMA2_RR,
1550 HFMA2_IMM_R, 1560 HFMA2_IMM_R,
1561 HSETP2_C,
1551 HSETP2_R, 1562 HSETP2_R,
1563 HSETP2_IMM,
1552 HSET2_R, 1564 HSET2_R,
1553 POPC_C, 1565 POPC_C,
1554 POPC_R, 1566 POPC_R,
@@ -1831,7 +1843,9 @@ private:
1831 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), 1843 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
1832 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), 1844 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
1833 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), 1845 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
1834 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), 1846 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
1847 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
1848 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
1835 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 1849 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
1836 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 1850 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
1837 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 1851 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6cb5fd4e1..21007d8b2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {
50 return *maxwell_3d; 50 return *maxwell_3d;
51} 51}
52 52
53Engines::KeplerCompute& GPU::KeplerCompute() {
54 return *kepler_compute;
55}
56
57const Engines::KeplerCompute& GPU::KeplerCompute() const {
58 return *kepler_compute;
59}
60
53MemoryManager& GPU::MemoryManager() { 61MemoryManager& GPU::MemoryManager() {
54 return *memory_manager; 62 return *memory_manager;
55} 63}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 5a8b1c74a..0055e5326 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,6 +155,12 @@ public:
155 /// Returns a const reference to the Maxwell3D GPU engine. 155 /// Returns a const reference to the Maxwell3D GPU engine.
156 const Engines::Maxwell3D& Maxwell3D() const; 156 const Engines::Maxwell3D& Maxwell3D() const;
157 157
158 /// Returns a reference to the KeplerCompute GPU engine.
159 Engines::KeplerCompute& KeplerCompute();
160
161 /// Returns a reference to the KeplerCompute GPU engine.
162 const Engines::KeplerCompute& KeplerCompute() const;
163
158 /// Returns a reference to the GPU memory manager. 164 /// Returns a reference to the GPU memory manager.
159 Tegra::MemoryManager& MemoryManager(); 165 Tegra::MemoryManager& MemoryManager();
160 166
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 2b7367568..9881df0d5 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -34,6 +34,9 @@ public:
34 /// Clear the current framebuffer 34 /// Clear the current framebuffer
35 virtual void Clear() = 0; 35 virtual void Clear() = 0;
36 36
37 /// Dispatches a compute shader invocation
38 virtual void DispatchCompute(GPUVAddr code_addr) = 0;
39
37 /// Notify rasterizer that all caches should be flushed to Switch memory 40 /// Notify rasterizer that all caches should be flushed to Switch memory
38 virtual void FlushAll() = 0; 41 virtual void FlushAll() = 0;
39 42
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0bb5c068c..c28ae795c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <bitset>
7#include <memory> 8#include <memory>
8#include <string> 9#include <string>
9#include <string_view> 10#include <string_view>
@@ -19,6 +20,7 @@
19#include "core/core.h" 20#include "core/core.h"
20#include "core/hle/kernel/process.h" 21#include "core/hle/kernel/process.h"
21#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/kepler_compute.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -105,6 +107,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
105 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 107 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
106 state.draw.shader_program = 0; 108 state.draw.shader_program = 0;
107 state.Apply(); 109 state.Apply();
110 clear_framebuffer.Create();
108 111
109 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); 112 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
110 CheckExtensions(); 113 CheckExtensions();
@@ -124,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
124 auto& gpu = system.GPU().Maxwell3D(); 127 auto& gpu = system.GPU().Maxwell3D();
125 const auto& regs = gpu.regs; 128 const auto& regs = gpu.regs;
126 129
127 if (!gpu.dirty_flags.vertex_attrib_format) { 130 if (!gpu.dirty.vertex_attrib_format) {
128 return state.draw.vertex_array; 131 return state.draw.vertex_array;
129 } 132 }
130 gpu.dirty_flags.vertex_attrib_format = false; 133 gpu.dirty.vertex_attrib_format = false;
131 134
132 MICROPROFILE_SCOPE(OpenGL_VAO); 135 MICROPROFILE_SCOPE(OpenGL_VAO);
133 136
@@ -181,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
181 } 184 }
182 185
183 // Rebinding the VAO invalidates the vertex buffer bindings. 186 // Rebinding the VAO invalidates the vertex buffer bindings.
184 gpu.dirty_flags.vertex_array.set(); 187 gpu.dirty.ResetVertexArrays();
185 188
186 state.draw.vertex_array = vao_entry.handle; 189 state.draw.vertex_array = vao_entry.handle;
187 return vao_entry.handle; 190 return vao_entry.handle;
@@ -189,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
189 192
190void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 193void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
191 auto& gpu = system.GPU().Maxwell3D(); 194 auto& gpu = system.GPU().Maxwell3D();
192 const auto& regs = gpu.regs; 195 if (!gpu.dirty.vertex_array_buffers)
193
194 if (gpu.dirty_flags.vertex_array.none())
195 return; 196 return;
197 gpu.dirty.vertex_array_buffers = false;
198
199 const auto& regs = gpu.regs;
196 200
197 MICROPROFILE_SCOPE(OpenGL_VB); 201 MICROPROFILE_SCOPE(OpenGL_VB);
198 202
199 // Upload all guest vertex arrays sequentially to our buffer 203 // Upload all guest vertex arrays sequentially to our buffer
200 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 204 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
201 if (!gpu.dirty_flags.vertex_array[index]) 205 if (!gpu.dirty.vertex_array[index])
202 continue; 206 continue;
207 gpu.dirty.vertex_array[index] = false;
208 gpu.dirty.vertex_instance[index] = false;
203 209
204 const auto& vertex_array = regs.vertex_array[index]; 210 const auto& vertex_array = regs.vertex_array[index];
205 if (!vertex_array.IsEnabled()) 211 if (!vertex_array.IsEnabled())
@@ -224,8 +230,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
224 glVertexArrayBindingDivisor(vao, index, 0); 230 glVertexArrayBindingDivisor(vao, index, 0);
225 } 231 }
226 } 232 }
233}
234
235void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
236 auto& gpu = system.GPU().Maxwell3D();
237
238 if (!gpu.dirty.vertex_instances)
239 return;
240 gpu.dirty.vertex_instances = false;
241
242 const auto& regs = gpu.regs;
243 // Upload all guest vertex arrays sequentially to our buffer
244 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
245 if (!gpu.dirty.vertex_instance[index])
246 continue;
247
248 gpu.dirty.vertex_instance[index] = false;
227 249
228 gpu.dirty_flags.vertex_array.reset(); 250 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
251 regs.vertex_array[index].divisor != 0) {
252 // Enable vertex buffer instancing with the specified divisor.
253 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
254 } else {
255 // Disable the vertex buffer instancing.
256 glVertexArrayBindingDivisor(vao, index, 0);
257 }
258 }
229} 259}
230 260
231GLintptr RasterizerOpenGL::SetupIndexBuffer() { 261GLintptr RasterizerOpenGL::SetupIndexBuffer() {
@@ -298,9 +328,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
298 328
299 Shader shader{shader_cache.GetStageProgram(program)}; 329 Shader shader{shader_cache.GetStageProgram(program)};
300 330
301 const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; 331 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
302 SetupDrawConstBuffers(stage_enum, shader); 332 SetupDrawConstBuffers(stage_enum, shader);
303 SetupGlobalRegions(stage_enum, shader); 333 SetupDrawGlobalMemory(stage_enum, shader);
304 const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; 334 const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
305 335
306 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; 336 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
@@ -341,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
341 371
342 SyncClipEnabled(clip_distances); 372 SyncClipEnabled(clip_distances);
343 373
344 gpu.dirty_flags.shaders = false; 374 gpu.dirty.shaders = false;
345} 375}
346 376
347std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 377std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -424,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
424 454
425 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 455 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
426 single_color_target}; 456 single_color_target};
427 if (fb_config_state == current_framebuffer_config_state && 457 if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
428 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
429 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 458 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
430 // single color targets). This is done because the guest registers may not change but the 459 // single color targets). This is done because the guest registers may not change but the
431 // host framebuffer may contain different attachments 460 // host framebuffer may contain different attachments
432 return current_depth_stencil_usage; 461 return current_depth_stencil_usage;
433 } 462 }
463 gpu.dirty.render_settings = false;
434 current_framebuffer_config_state = fb_config_state; 464 current_framebuffer_config_state = fb_config_state;
435 465
436 texture_cache.GuardRenderTargets(true); 466 texture_cache.GuardRenderTargets(true);
@@ -519,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
519 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; 549 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
520} 550}
521 551
552void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
553 bool using_depth_fb, bool using_stencil_fb) {
554 auto& gpu = system.GPU().Maxwell3D();
555 const auto& regs = gpu.regs;
556
557 texture_cache.GuardRenderTargets(true);
558 View color_surface{};
559 if (using_color_fb) {
560 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
561 }
562 View depth_surface{};
563 if (using_depth_fb || using_stencil_fb) {
564 depth_surface = texture_cache.GetDepthBufferSurface(false);
565 }
566 texture_cache.GuardRenderTargets(false);
567
568 current_state.draw.draw_framebuffer = clear_framebuffer.handle;
569 current_state.ApplyFramebufferState();
570
571 if (color_surface) {
572 color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
573 } else {
574 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
575 }
576
577 if (depth_surface) {
578 const auto& params = depth_surface->GetSurfaceParams();
579 switch (params.type) {
580 case VideoCore::Surface::SurfaceType::Depth: {
581 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
582 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
583 break;
584 }
585 case VideoCore::Surface::SurfaceType::DepthStencil: {
586 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
587 break;
588 }
589 default: { UNIMPLEMENTED(); }
590 }
591 } else {
592 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
593 0);
594 }
595}
596
522void RasterizerOpenGL::Clear() { 597void RasterizerOpenGL::Clear() {
523 const auto& regs = system.GPU().Maxwell3D().regs; 598 const auto& maxwell3d = system.GPU().Maxwell3D();
599
600 if (!maxwell3d.ShouldExecute()) {
601 return;
602 }
603
604 const auto& regs = maxwell3d.regs;
524 bool use_color{}; 605 bool use_color{};
525 bool use_depth{}; 606 bool use_depth{};
526 bool use_stencil{}; 607 bool use_stencil{};
527 608
528 OpenGLState clear_state; 609 OpenGLState prev_state{OpenGLState::GetCurState()};
610 SCOPE_EXIT({
611 prev_state.AllDirty();
612 prev_state.Apply();
613 });
614
615 OpenGLState clear_state{OpenGLState::GetCurState()};
616 clear_state.SetDefaultViewports();
529 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 617 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
530 regs.clear_buffers.A) { 618 regs.clear_buffers.A) {
531 use_color = true; 619 use_color = true;
@@ -545,6 +633,7 @@ void RasterizerOpenGL::Clear() {
545 // true. 633 // true.
546 clear_state.depth.test_enabled = true; 634 clear_state.depth.test_enabled = true;
547 clear_state.depth.test_func = GL_ALWAYS; 635 clear_state.depth.test_func = GL_ALWAYS;
636 clear_state.depth.write_mask = GL_TRUE;
548 } 637 }
549 if (regs.clear_buffers.S) { 638 if (regs.clear_buffers.S) {
550 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 639 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -581,8 +670,9 @@ void RasterizerOpenGL::Clear() {
581 return; 670 return;
582 } 671 }
583 672
584 const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( 673 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
585 clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); 674
675 SyncViewport(clear_state);
586 if (regs.clear_flags.scissor) { 676 if (regs.clear_flags.scissor) {
587 SyncScissorTest(clear_state); 677 SyncScissorTest(clear_state);
588 } 678 }
@@ -591,21 +681,18 @@ void RasterizerOpenGL::Clear() {
591 clear_state.EmulateViewportWithScissor(); 681 clear_state.EmulateViewportWithScissor();
592 } 682 }
593 683
594 clear_state.ApplyColorMask(); 684 clear_state.AllDirty();
595 clear_state.ApplyDepth(); 685 clear_state.Apply();
596 clear_state.ApplyStencilTest();
597 clear_state.ApplyViewport();
598 clear_state.ApplyFramebufferState();
599 686
600 if (use_color) { 687 if (use_color) {
601 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 688 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
602 } 689 }
603 690
604 if (clear_depth && clear_stencil) { 691 if (use_depth && use_stencil) {
605 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 692 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
606 } else if (clear_depth) { 693 } else if (use_depth) {
607 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth); 694 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
608 } else if (clear_stencil) { 695 } else if (use_stencil) {
609 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 696 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
610 } 697 }
611} 698}
@@ -616,6 +703,11 @@ void RasterizerOpenGL::DrawArrays() {
616 703
617 MICROPROFILE_SCOPE(OpenGL_Drawing); 704 MICROPROFILE_SCOPE(OpenGL_Drawing);
618 auto& gpu = system.GPU().Maxwell3D(); 705 auto& gpu = system.GPU().Maxwell3D();
706
707 if (!gpu.ShouldExecute()) {
708 return;
709 }
710
619 const auto& regs = gpu.regs; 711 const auto& regs = gpu.regs;
620 712
621 SyncColorMask(); 713 SyncColorMask();
@@ -661,6 +753,7 @@ void RasterizerOpenGL::DrawArrays() {
661 753
662 // Upload vertex and index data. 754 // Upload vertex and index data.
663 SetupVertexBuffer(vao); 755 SetupVertexBuffer(vao);
756 SetupVertexInstances(vao);
664 const GLintptr index_buffer_offset = SetupIndexBuffer(); 757 const GLintptr index_buffer_offset = SetupIndexBuffer();
665 758
666 // Setup draw parameters. It will automatically choose what glDraw* method to use. 759 // Setup draw parameters. It will automatically choose what glDraw* method to use.
@@ -687,7 +780,7 @@ void RasterizerOpenGL::DrawArrays() {
687 780
688 if (invalidate) { 781 if (invalidate) {
689 // As all cached buffers are invalidated, we need to recheck their state. 782 // As all cached buffers are invalidated, we need to recheck their state.
690 gpu.dirty_flags.vertex_array.set(); 783 gpu.dirty.ResetVertexArrays();
691 } 784 }
692 785
693 shader_program_manager->ApplyTo(state); 786 shader_program_manager->ApplyTo(state);
@@ -700,6 +793,46 @@ void RasterizerOpenGL::DrawArrays() {
700 params.DispatchDraw(); 793 params.DispatchDraw();
701 794
702 accelerate_draw = AccelDraw::Disabled; 795 accelerate_draw = AccelDraw::Disabled;
796 gpu.dirty.memory_general = false;
797}
798
799void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
800 if (!GLAD_GL_ARB_compute_variable_group_size) {
801 LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
802 "lack of GL_ARB_compute_variable_group_size");
803 return;
804 }
805
806 auto kernel = shader_cache.GetComputeKernel(code_addr);
807 const auto [program, next_bindings] = kernel->GetProgramHandle({});
808 state.draw.shader_program = program;
809 state.draw.program_pipeline = 0;
810
811 const std::size_t buffer_size =
812 Tegra::Engines::KeplerCompute::NumConstBuffers *
813 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
814 buffer_cache.Map(buffer_size);
815
816 bind_ubo_pushbuffer.Setup(0);
817 bind_ssbo_pushbuffer.Setup(0);
818
819 SetupComputeConstBuffers(kernel);
820 SetupComputeGlobalMemory(kernel);
821
822 // TODO(Rodrigo): Bind images and samplers
823
824 buffer_cache.Unmap();
825
826 bind_ubo_pushbuffer.Bind();
827 bind_ssbo_pushbuffer.Bind();
828
829 state.ApplyShaderProgram();
830 state.ApplyProgramPipeline();
831
832 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
833 glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
834 launch_desc.grid_dim_z, launch_desc.block_dim_x,
835 launch_desc.block_dim_y, launch_desc.block_dim_z);
703} 836}
704 837
705void RasterizerOpenGL::FlushAll() {} 838void RasterizerOpenGL::FlushAll() {}
@@ -775,12 +908,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
775void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 908void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
776 const Shader& shader) { 909 const Shader& shader) {
777 MICROPROFILE_SCOPE(OpenGL_UBO); 910 MICROPROFILE_SCOPE(OpenGL_UBO);
778 const auto stage_index = static_cast<std::size_t>(stage); 911 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
779 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; 912 const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
780
781 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
782 for (const auto& entry : shader->GetShaderEntries().const_buffers) { 913 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
783 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); 914 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
915 SetupConstBuffer(buffer, entry);
916 }
917}
918
919void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
920 MICROPROFILE_SCOPE(OpenGL_UBO);
921 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
922 for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
923 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
924 const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
925 Tegra::Engines::ConstBufferInfo buffer;
926 buffer.address = config.Address();
927 buffer.size = config.size;
928 buffer.enabled = mask[entry.GetIndex()];
929 SetupConstBuffer(buffer, entry);
784 } 930 }
785} 931}
786 932
@@ -801,24 +947,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
801 bind_ubo_pushbuffer.Push(cbuf, offset, size); 947 bind_ubo_pushbuffer.Push(cbuf, offset, size);
802} 948}
803 949
804void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 950void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
805 const Shader& shader) { 951 const Shader& shader) {
806 auto& gpu{system.GPU()}; 952 auto& gpu{system.GPU()};
807 auto& memory_manager{gpu.MemoryManager()}; 953 auto& memory_manager{gpu.MemoryManager()};
808 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; 954 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
809 const auto alignment{device.GetShaderStorageBufferAlignment()};
810
811 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { 955 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
812 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; 956 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
813 const auto actual_addr{memory_manager.Read<u64>(addr)}; 957 const auto gpu_addr{memory_manager.Read<u64>(addr)};
814 const auto size{memory_manager.Read<u32>(addr + 8)}; 958 const auto size{memory_manager.Read<u32>(addr + 8)};
959 SetupGlobalMemory(entry, gpu_addr, size);
960 }
961}
815 962
816 const auto [ssbo, buffer_offset] = 963void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
817 buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); 964 auto& gpu{system.GPU()};
818 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); 965 auto& memory_manager{gpu.MemoryManager()};
966 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
967 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
968 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
969 const auto gpu_addr{memory_manager.Read<u64>(addr)};
970 const auto size{memory_manager.Read<u32>(addr + 8)};
971 SetupGlobalMemory(entry, gpu_addr, size);
819 } 972 }
820} 973}
821 974
975void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
976 GPUVAddr gpu_addr, std::size_t size) {
977 const auto alignment{device.GetShaderStorageBufferAlignment()};
978 const auto [ssbo, buffer_offset] =
979 buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten());
980 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
981}
982
822TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 983TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
823 BaseBindings base_bindings) { 984 BaseBindings base_bindings) {
824 MICROPROFILE_SCOPE(OpenGL_Texture); 985 MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -907,10 +1068,11 @@ void RasterizerOpenGL::SyncClipCoef() {
907} 1068}
908 1069
909void RasterizerOpenGL::SyncCullMode() { 1070void RasterizerOpenGL::SyncCullMode() {
910 const auto& regs = system.GPU().Maxwell3D().regs; 1071 auto& maxwell3d = system.GPU().Maxwell3D();
911 1072
912 state.cull.enabled = regs.cull.enabled != 0; 1073 const auto& regs = maxwell3d.regs;
913 1074
1075 state.cull.enabled = regs.cull.enabled != 0;
914 if (state.cull.enabled) { 1076 if (state.cull.enabled) {
915 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1077 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
916 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 1078 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -943,16 +1105,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
943 state.depth.test_enabled = regs.depth_test_enable != 0; 1105 state.depth.test_enabled = regs.depth_test_enable != 0;
944 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1106 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
945 1107
946 if (!state.depth.test_enabled) 1108 if (!state.depth.test_enabled) {
947 return; 1109 return;
1110 }
948 1111
949 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1112 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
950} 1113}
951 1114
952void RasterizerOpenGL::SyncStencilTestState() { 1115void RasterizerOpenGL::SyncStencilTestState() {
953 const auto& regs = system.GPU().Maxwell3D().regs; 1116 auto& maxwell3d = system.GPU().Maxwell3D();
954 state.stencil.test_enabled = regs.stencil_enable != 0; 1117 if (!maxwell3d.dirty.stencil_test) {
1118 return;
1119 }
1120 const auto& regs = maxwell3d.regs;
955 1121
1122 state.stencil.test_enabled = regs.stencil_enable != 0;
956 if (!regs.stencil_enable) { 1123 if (!regs.stencil_enable) {
957 return; 1124 return;
958 } 1125 }
@@ -981,10 +1148,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
981 state.stencil.back.action_depth_fail = GL_KEEP; 1148 state.stencil.back.action_depth_fail = GL_KEEP;
982 state.stencil.back.action_depth_pass = GL_KEEP; 1149 state.stencil.back.action_depth_pass = GL_KEEP;
983 } 1150 }
1151 state.MarkDirtyStencilState();
1152 maxwell3d.dirty.stencil_test = false;
984} 1153}
985 1154
986void RasterizerOpenGL::SyncColorMask() { 1155void RasterizerOpenGL::SyncColorMask() {
987 const auto& regs = system.GPU().Maxwell3D().regs; 1156 auto& maxwell3d = system.GPU().Maxwell3D();
1157 if (!maxwell3d.dirty.color_mask) {
1158 return;
1159 }
1160 const auto& regs = maxwell3d.regs;
1161
988 const std::size_t count = 1162 const std::size_t count =
989 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1163 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
990 for (std::size_t i = 0; i < count; i++) { 1164 for (std::size_t i = 0; i < count; i++) {
@@ -995,6 +1169,9 @@ void RasterizerOpenGL::SyncColorMask() {
995 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; 1169 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
996 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; 1170 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
997 } 1171 }
1172
1173 state.MarkDirtyColorMask();
1174 maxwell3d.dirty.color_mask = false;
998} 1175}
999 1176
1000void RasterizerOpenGL::SyncMultiSampleState() { 1177void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1009,7 +1186,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
1009} 1186}
1010 1187
1011void RasterizerOpenGL::SyncBlendState() { 1188void RasterizerOpenGL::SyncBlendState() {
1012 const auto& regs = system.GPU().Maxwell3D().regs; 1189 auto& maxwell3d = system.GPU().Maxwell3D();
1190 if (!maxwell3d.dirty.blend_state) {
1191 return;
1192 }
1193 const auto& regs = maxwell3d.regs;
1013 1194
1014 state.blend_color.red = regs.blend_color.r; 1195 state.blend_color.red = regs.blend_color.r;
1015 state.blend_color.green = regs.blend_color.g; 1196 state.blend_color.green = regs.blend_color.g;
@@ -1032,6 +1213,8 @@ void RasterizerOpenGL::SyncBlendState() {
1032 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1213 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1033 state.blend[i].enabled = false; 1214 state.blend[i].enabled = false;
1034 } 1215 }
1216 maxwell3d.dirty.blend_state = false;
1217 state.MarkDirtyBlendState();
1035 return; 1218 return;
1036 } 1219 }
1037 1220
@@ -1048,6 +1231,9 @@ void RasterizerOpenGL::SyncBlendState() {
1048 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1231 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1049 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1232 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1050 } 1233 }
1234
1235 state.MarkDirtyBlendState();
1236 maxwell3d.dirty.blend_state = false;
1051} 1237}
1052 1238
1053void RasterizerOpenGL::SyncLogicOpState() { 1239void RasterizerOpenGL::SyncLogicOpState() {
@@ -1099,13 +1285,21 @@ void RasterizerOpenGL::SyncPointState() {
1099} 1285}
1100 1286
1101void RasterizerOpenGL::SyncPolygonOffset() { 1287void RasterizerOpenGL::SyncPolygonOffset() {
1102 const auto& regs = system.GPU().Maxwell3D().regs; 1288 auto& maxwell3d = system.GPU().Maxwell3D();
1289 if (!maxwell3d.dirty.polygon_offset) {
1290 return;
1291 }
1292 const auto& regs = maxwell3d.regs;
1293
1103 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1294 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1104 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1295 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1105 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1296 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1106 state.polygon_offset.units = regs.polygon_offset_units; 1297 state.polygon_offset.units = regs.polygon_offset_units;
1107 state.polygon_offset.factor = regs.polygon_offset_factor; 1298 state.polygon_offset.factor = regs.polygon_offset_factor;
1108 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1299 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1300
1301 state.MarkDirtyPolygonOffset();
1302 maxwell3d.dirty.polygon_offset = false;
1109} 1303}
1110 1304
1111void RasterizerOpenGL::SyncAlphaTest() { 1305void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 40b571d58..8b123c48d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -58,6 +58,7 @@ public:
58 58
59 void DrawArrays() override; 59 void DrawArrays() override;
60 void Clear() override; 60 void Clear() override;
61 void DispatchCompute(GPUVAddr code_addr) override;
61 void FlushAll() override; 62 void FlushAll() override;
62 void FlushRegion(CacheAddr addr, u64 size) override; 63 void FlushRegion(CacheAddr addr, u64 size) override;
63 void InvalidateRegion(CacheAddr addr, u64 size) override; 64 void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -108,17 +109,30 @@ private:
108 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, 109 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
109 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); 110 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
110 111
112 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
113 bool using_depth_fb, bool using_stencil_fb);
114
111 /// Configures the current constbuffers to use for the draw command. 115 /// Configures the current constbuffers to use for the draw command.
112 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 116 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
113 const Shader& shader); 117 const Shader& shader);
114 118
119 /// Configures the current constbuffers to use for the kernel invocation.
120 void SetupComputeConstBuffers(const Shader& kernel);
121
115 /// Configures a constant buffer. 122 /// Configures a constant buffer.
116 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, 123 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
117 const GLShader::ConstBufferEntry& entry); 124 const GLShader::ConstBufferEntry& entry);
118 125
119 /// Configures the current global memory entries to use for the draw command. 126 /// Configures the current global memory entries to use for the draw command.
120 void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 127 void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
121 const Shader& shader); 128 const Shader& shader);
129
130 /// Configures the current global memory entries to use for the kernel invocation.
131 void SetupComputeGlobalMemory(const Shader& kernel);
132
133 /// Configures a constant buffer.
134 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
135 std::size_t size);
122 136
123 /// Configures the current textures to use for the draw command. Returns shaders texture buffer 137 /// Configures the current textures to use for the draw command. Returns shaders texture buffer
124 /// usage. 138 /// usage.
@@ -216,6 +230,7 @@ private:
216 GLuint SetupVertexFormat(); 230 GLuint SetupVertexFormat();
217 231
218 void SetupVertexBuffer(GLuint vao); 232 void SetupVertexBuffer(GLuint vao);
233 void SetupVertexInstances(GLuint vao);
219 234
220 GLintptr SetupIndexBuffer(); 235 GLintptr SetupIndexBuffer();
221 236
@@ -226,6 +241,8 @@ private:
226 enum class AccelDraw { Disabled, Arrays, Indexed }; 241 enum class AccelDraw { Disabled, Arrays, Indexed };
227 AccelDraw accelerate_draw = AccelDraw::Disabled; 242 AccelDraw accelerate_draw = AccelDraw::Disabled;
228 243
244 OGLFramebuffer clear_framebuffer;
245
229 using CachedPageMap = boost::icl::interval_map<u64, int>; 246 using CachedPageMap = boost::icl::interval_map<u64, int>;
230 CachedPageMap cached_pages; 247 CachedPageMap cached_pages;
231}; 248};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 32dd9eae7..1c90facc3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,13 +23,13 @@ namespace OpenGL {
23 23
24using VideoCommon::Shader::ProgramCode; 24using VideoCommon::Shader::ProgramCode;
25 25
26// One UBO is always reserved for emulation values 26// One UBO is always reserved for emulation values on staged shaders
27constexpr u32 RESERVED_UBOS = 1; 27constexpr u32 STAGE_RESERVED_UBOS = 1;
28 28
29struct UnspecializedShader { 29struct UnspecializedShader {
30 std::string code; 30 std::string code;
31 GLShader::ShaderEntries entries; 31 GLShader::ShaderEntries entries;
32 Maxwell::ShaderProgram program_type; 32 ProgramType program_type;
33}; 33};
34 34
35namespace { 35namespace {
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
55} 55}
56 56
57/// Gets the shader type from a Maxwell program type 57/// Gets the shader type from a Maxwell program type
58constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { 58constexpr GLenum GetShaderType(ProgramType program_type) {
59 switch (program_type) { 59 switch (program_type) {
60 case Maxwell::ShaderProgram::VertexA: 60 case ProgramType::VertexA:
61 case Maxwell::ShaderProgram::VertexB: 61 case ProgramType::VertexB:
62 return GL_VERTEX_SHADER; 62 return GL_VERTEX_SHADER;
63 case Maxwell::ShaderProgram::Geometry: 63 case ProgramType::Geometry:
64 return GL_GEOMETRY_SHADER; 64 return GL_GEOMETRY_SHADER;
65 case Maxwell::ShaderProgram::Fragment: 65 case ProgramType::Fragment:
66 return GL_FRAGMENT_SHADER; 66 return GL_FRAGMENT_SHADER;
67 case ProgramType::Compute:
68 return GL_COMPUTE_SHADER;
67 default: 69 default:
68 return GL_NONE; 70 return GL_NONE;
69 } 71 }
@@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
100 } 102 }
101} 103}
102 104
105ProgramType GetProgramType(Maxwell::ShaderProgram program) {
106 switch (program) {
107 case Maxwell::ShaderProgram::VertexA:
108 return ProgramType::VertexA;
109 case Maxwell::ShaderProgram::VertexB:
110 return ProgramType::VertexB;
111 case Maxwell::ShaderProgram::TesselationControl:
112 return ProgramType::TessellationControl;
113 case Maxwell::ShaderProgram::TesselationEval:
114 return ProgramType::TessellationEval;
115 case Maxwell::ShaderProgram::Geometry:
116 return ProgramType::Geometry;
117 case Maxwell::ShaderProgram::Fragment:
118 return ProgramType::Fragment;
119 }
120 UNREACHABLE();
121 return {};
122}
123
103/// Calculates the size of a program stream 124/// Calculates the size of a program stream
104std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 125std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
105 constexpr std::size_t start_offset = 10; 126 constexpr std::size_t start_offset = 10;
@@ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
128} 149}
129 150
130/// Hashes one (or two) program streams 151/// Hashes one (or two) program streams
131u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, 152u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
132 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { 153 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
133 if (size_a == 0) { 154 if (size_a == 0) {
134 size_a = CalculateProgramSize(code); 155 size_a = CalculateProgramSize(code);
135 } 156 }
136 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); 157 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
137 if (program_type != Maxwell::ShaderProgram::VertexA) { 158 if (program_type != ProgramType::VertexA) {
138 return unique_identifier; 159 return unique_identifier;
139 } 160 }
140 // VertexA programs include two programs 161 // VertexA programs include two programs
@@ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
152} 173}
153 174
154/// Creates an unspecialized program from code streams 175/// Creates an unspecialized program from code streams
155GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, 176GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
156 ProgramCode program_code, ProgramCode program_code_b) { 177 ProgramCode program_code, ProgramCode program_code_b) {
157 GLShader::ShaderSetup setup(program_code); 178 GLShader::ShaderSetup setup(program_code);
158 setup.program.size_a = CalculateProgramSize(program_code); 179 setup.program.size_a = CalculateProgramSize(program_code);
159 setup.program.size_b = 0; 180 setup.program.size_b = 0;
160 if (program_type == Maxwell::ShaderProgram::VertexA) { 181 if (program_type == ProgramType::VertexA) {
161 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 182 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
162 // Conventional HW does not support this, so we combine VertexA and VertexB into one 183 // Conventional HW does not support this, so we combine VertexA and VertexB into one
163 // stage here. 184 // stage here.
@@ -168,22 +189,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
168 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); 189 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
169 190
170 switch (program_type) { 191 switch (program_type) {
171 case Maxwell::ShaderProgram::VertexA: 192 case ProgramType::VertexA:
172 case Maxwell::ShaderProgram::VertexB: 193 case ProgramType::VertexB:
173 return GLShader::GenerateVertexShader(device, setup); 194 return GLShader::GenerateVertexShader(device, setup);
174 case Maxwell::ShaderProgram::Geometry: 195 case ProgramType::Geometry:
175 return GLShader::GenerateGeometryShader(device, setup); 196 return GLShader::GenerateGeometryShader(device, setup);
176 case Maxwell::ShaderProgram::Fragment: 197 case ProgramType::Fragment:
177 return GLShader::GenerateFragmentShader(device, setup); 198 return GLShader::GenerateFragmentShader(device, setup);
199 case ProgramType::Compute:
200 return GLShader::GenerateComputeShader(device, setup);
178 default: 201 default:
179 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 202 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
180 UNREACHABLE();
181 return {}; 203 return {};
182 } 204 }
183} 205}
184 206
185CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 207CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
186 Maxwell::ShaderProgram program_type, const ProgramVariant& variant, 208 ProgramType program_type, const ProgramVariant& variant,
187 bool hint_retrievable = false) { 209 bool hint_retrievable = false) {
188 auto base_bindings{variant.base_bindings}; 210 auto base_bindings{variant.base_bindings};
189 const auto primitive_mode{variant.primitive_mode}; 211 const auto primitive_mode{variant.primitive_mode};
@@ -194,7 +216,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
194 if (entries.shader_viewport_layer_array) { 216 if (entries.shader_viewport_layer_array) {
195 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; 217 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
196 } 218 }
197 source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 219 if (program_type == ProgramType::Compute) {
220 source += "#extension GL_ARB_compute_variable_group_size : require\n";
221 }
222 source += '\n';
223
224 if (program_type != ProgramType::Compute) {
225 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
226 }
198 227
199 for (const auto& cbuf : entries.const_buffers) { 228 for (const auto& cbuf : entries.const_buffers) {
200 source += 229 source +=
@@ -221,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
221 source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); 250 source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
222 } 251 }
223 252
224 if (program_type == Maxwell::ShaderProgram::Geometry) { 253 if (program_type == ProgramType::Geometry) {
225 const auto [glsl_topology, debug_name, max_vertices] = 254 const auto [glsl_topology, debug_name, max_vertices] =
226 GetPrimitiveDescription(primitive_mode); 255 GetPrimitiveDescription(primitive_mode);
227 256
228 source += "layout (" + std::string(glsl_topology) + ") in;\n"; 257 source += "layout (" + std::string(glsl_topology) + ") in;\n";
229 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 258 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
230 } 259 }
260 if (program_type == ProgramType::Compute) {
261 source += "layout (local_size_variable) in;\n";
262 }
231 263
232 source += code; 264 source += code;
233 265
@@ -255,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() {
255 287
256} // Anonymous namespace 288} // Anonymous namespace
257 289
258CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, 290CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
259 GLShader::ProgramResult result) 291 GLShader::ProgramResult result)
260 : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, 292 : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
261 unique_identifier{params.unique_identifier}, program_type{program_type}, 293 unique_identifier{params.unique_identifier}, program_type{program_type},
@@ -268,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
268 ProgramCode&& program_code_b) { 300 ProgramCode&& program_code_b) {
269 const auto code_size{CalculateProgramSize(program_code)}; 301 const auto code_size{CalculateProgramSize(program_code)};
270 const auto code_size_b{CalculateProgramSize(program_code_b)}; 302 const auto code_size_b{CalculateProgramSize(program_code_b)};
271 auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; 303 auto result{
304 CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
272 if (result.first.empty()) { 305 if (result.first.empty()) {
273 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now 306 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
274 return {}; 307 return {};
275 } 308 }
276 309
277 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 310 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
278 params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), 311 params.unique_identifier, GetProgramType(program_type),
279 static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), 312 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
280 std::move(program_code_b))); 313 std::move(program_code), std::move(program_code_b)));
281 314
282 return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); 315 return std::shared_ptr<CachedShader>(
316 new CachedShader(params, GetProgramType(program_type), std::move(result)));
283} 317}
284 318
285Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, 319Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
286 Maxwell::ShaderProgram program_type, 320 Maxwell::ShaderProgram program_type,
287 GLShader::ProgramResult result) { 321 GLShader::ProgramResult result) {
288 return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); 322 return std::shared_ptr<CachedShader>(
323 new CachedShader(params, GetProgramType(program_type), std::move(result)));
324}
325
326Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
327 auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
328
329 const auto code_size{CalculateProgramSize(code)};
330 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
331 static_cast<u32>(code_size / sizeof(u64)), 0,
332 std::move(code), {}));
333
334 return std::shared_ptr<CachedShader>(
335 new CachedShader(params, ProgramType::Compute, std::move(result)));
336}
337
338Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
339 GLShader::ProgramResult result) {
340 return std::shared_ptr<CachedShader>(
341 new CachedShader(params, ProgramType::Compute, std::move(result)));
289} 342}
290 343
291std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 344std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
292 GLuint handle{}; 345 GLuint handle{};
293 if (program_type == Maxwell::ShaderProgram::Geometry) { 346 if (program_type == ProgramType::Geometry) {
294 handle = GetGeometryShader(variant); 347 handle = GetGeometryShader(variant);
295 } else { 348 } else {
296 const auto [entry, is_cache_miss] = programs.try_emplace(variant); 349 const auto [entry, is_cache_miss] = programs.try_emplace(variant);
@@ -308,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
308 handle = program->handle; 361 handle = program->handle;
309 } 362 }
310 363
311 auto base_bindings{variant.base_bindings}; 364 auto base_bindings = variant.base_bindings;
312 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; 365 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
366 if (program_type != ProgramType::Compute) {
367 base_bindings.cbuf += STAGE_RESERVED_UBOS;
368 }
313 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 369 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
314 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 370 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
315 371
@@ -572,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
572} 628}
573 629
574Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 630Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
575 if (!system.GPU().Maxwell3D().dirty_flags.shaders) { 631 if (!system.GPU().Maxwell3D().dirty.shaders) {
576 return last_shaders[static_cast<std::size_t>(program)]; 632 return last_shaders[static_cast<std::size_t>(program)];
577 } 633 }
578 634
@@ -589,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
589 // No shader found - create a new one 645 // No shader found - create a new one
590 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 646 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
591 ProgramCode program_code_b; 647 ProgramCode program_code_b;
592 if (program == Maxwell::ShaderProgram::VertexA) { 648 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
649 if (is_program_a) {
593 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 650 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
594 program_code_b = GetShaderCode(memory_manager, program_addr_b, 651 program_code_b = GetShaderCode(memory_manager, program_addr_b,
595 memory_manager.GetPointer(program_addr_b)); 652 memory_manager.GetPointer(program_addr_b));
596 } 653 }
597 654
598 const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 655 const auto unique_identifier =
656 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
599 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 657 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
600 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 658 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
601 host_ptr, unique_identifier}; 659 host_ptr, unique_identifier};
@@ -612,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
612 return last_shaders[static_cast<std::size_t>(program)] = shader; 670 return last_shaders[static_cast<std::size_t>(program)] = shader;
613} 671}
614 672
673Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
674 auto& memory_manager{system.GPU().MemoryManager()};
675 const auto host_ptr{memory_manager.GetPointer(code_addr)};
676 auto kernel = TryGet(host_ptr);
677 if (kernel) {
678 return kernel;
679 }
680
681 // No kernel found - create a new one
682 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
683 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
684 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
685 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
686 host_ptr, unique_identifier};
687
688 const auto found = precompiled_shaders.find(unique_identifier);
689 if (found == precompiled_shaders.end()) {
690 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
691 } else {
692 kernel = CachedShader::CreateKernelFromCache(params, found->second);
693 }
694
695 Register(kernel);
696 return kernel;
697}
698
615} // namespace OpenGL 699} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index bbb53cdf4..a3106a0ff 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -61,6 +61,11 @@ public:
61 Maxwell::ShaderProgram program_type, 61 Maxwell::ShaderProgram program_type,
62 GLShader::ProgramResult result); 62 GLShader::ProgramResult result);
63 63
64 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
65
66 static Shader CreateKernelFromCache(const ShaderParameters& params,
67 GLShader::ProgramResult result);
68
64 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
65 return cpu_addr; 70 return cpu_addr;
66 } 71 }
@@ -78,7 +83,7 @@ public:
78 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 83 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
79 84
80private: 85private:
81 explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, 86 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
82 GLShader::ProgramResult result); 87 GLShader::ProgramResult result);
83 88
84 // Geometry programs. These are needed because GLSL needs an input topology but it's not 89 // Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -104,7 +109,7 @@ private:
104 u8* host_ptr{}; 109 u8* host_ptr{};
105 VAddr cpu_addr{}; 110 VAddr cpu_addr{};
106 u64 unique_identifier{}; 111 u64 unique_identifier{};
107 Maxwell::ShaderProgram program_type{}; 112 ProgramType program_type{};
108 ShaderDiskCacheOpenGL& disk_cache; 113 ShaderDiskCacheOpenGL& disk_cache;
109 const PrecompiledPrograms& precompiled_programs; 114 const PrecompiledPrograms& precompiled_programs;
110 115
@@ -132,6 +137,9 @@ public:
132 /// Gets the current specified shader stage program 137 /// Gets the current specified shader stage program
133 Shader GetStageProgram(Maxwell::ShaderProgram program); 138 Shader GetStageProgram(Maxwell::ShaderProgram program);
134 139
140 /// Gets a compute kernel in the passed address
141 Shader GetComputeKernel(GPUVAddr code_addr);
142
135protected: 143protected:
136 // We do not have to flush this cache as things in it are never modified by us. 144 // We do not have to flush this cache as things in it are never modified by us.
137 void FlushObjectInner(const Shader& object) override {} 145 void FlushObjectInner(const Shader& object) override {}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 119073776..ffe26b241 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,7 +37,6 @@ using namespace std::string_literals;
37using namespace VideoCommon::Shader; 37using namespace VideoCommon::Shader;
38 38
39using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
40using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
41using Operation = const OperationNode&; 40using Operation = const OperationNode&;
42 41
43enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 42enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
162 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
163} 162}
164 163
164constexpr bool IsVertexShader(ProgramType stage) {
165 return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
166}
167
165class GLSLDecompiler final { 168class GLSLDecompiler final {
166public: 169public:
167 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, 170 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
168 std::string suffix) 171 std::string suffix)
169 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 172 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
170 173
@@ -248,25 +251,21 @@ public:
248 } 251 }
249 entries.clip_distances = ir.GetClipDistances(); 252 entries.clip_distances = ir.GetClipDistances();
250 entries.shader_viewport_layer_array = 253 entries.shader_viewport_layer_array =
251 stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); 254 IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
252 entries.shader_length = ir.GetLength(); 255 entries.shader_length = ir.GetLength();
253 return entries; 256 return entries;
254 } 257 }
255 258
256private: 259private:
257 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
258 using OperationDecompilersArray =
259 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
260
261 void DeclareVertex() { 260 void DeclareVertex() {
262 if (stage != ShaderStage::Vertex) 261 if (!IsVertexShader(stage))
263 return; 262 return;
264 263
265 DeclareVertexRedeclarations(); 264 DeclareVertexRedeclarations();
266 } 265 }
267 266
268 void DeclareGeometry() { 267 void DeclareGeometry() {
269 if (stage != ShaderStage::Geometry) { 268 if (stage != ProgramType::Geometry) {
270 return; 269 return;
271 } 270 }
272 271
@@ -297,14 +296,14 @@ private:
297 break; 296 break;
298 } 297 }
299 } 298 }
300 if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { 299 if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
301 if (ir.UsesLayer()) { 300 if (ir.UsesLayer()) {
302 code.AddLine("int gl_Layer;"); 301 code.AddLine("int gl_Layer;");
303 } 302 }
304 if (ir.UsesViewportIndex()) { 303 if (ir.UsesViewportIndex()) {
305 code.AddLine("int gl_ViewportIndex;"); 304 code.AddLine("int gl_ViewportIndex;");
306 } 305 }
307 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && 306 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
308 !device.HasVertexViewportLayer()) { 307 !device.HasVertexViewportLayer()) {
309 LOG_ERROR( 308 LOG_ERROR(
310 Render_OpenGL, 309 Render_OpenGL,
@@ -341,11 +340,16 @@ private:
341 } 340 }
342 341
343 void DeclareLocalMemory() { 342 void DeclareLocalMemory() {
344 if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { 343 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
345 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 344 // specialization time.
346 code.AddLine("float {}[{}];", GetLocalMemory(), element_count); 345 const u64 local_memory_size =
347 code.AddNewLine(); 346 stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
347 if (local_memory_size == 0) {
348 return;
348 } 349 }
350 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
351 code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
352 code.AddNewLine();
349 } 353 }
350 354
351 void DeclareInternalFlags() { 355 void DeclareInternalFlags() {
@@ -399,12 +403,12 @@ private:
399 const u32 location{GetGenericAttributeIndex(index)}; 403 const u32 location{GetGenericAttributeIndex(index)};
400 404
401 std::string name{GetInputAttribute(index)}; 405 std::string name{GetInputAttribute(index)};
402 if (stage == ShaderStage::Geometry) { 406 if (stage == ProgramType::Geometry) {
403 name = "gs_" + name + "[]"; 407 name = "gs_" + name + "[]";
404 } 408 }
405 409
406 std::string suffix; 410 std::string suffix;
407 if (stage == ShaderStage::Fragment) { 411 if (stage == ProgramType::Fragment) {
408 const auto input_mode{header.ps.GetAttributeUse(location)}; 412 const auto input_mode{header.ps.GetAttributeUse(location)};
409 if (skip_unused && input_mode == AttributeUse::Unused) { 413 if (skip_unused && input_mode == AttributeUse::Unused) {
410 return; 414 return;
@@ -416,7 +420,7 @@ private:
416 } 420 }
417 421
418 void DeclareOutputAttributes() { 422 void DeclareOutputAttributes() {
419 if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { 423 if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
420 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { 424 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
421 DeclareOutputAttribute(ToGenericAttribute(i)); 425 DeclareOutputAttribute(ToGenericAttribute(i));
422 } 426 }
@@ -538,7 +542,7 @@ private:
538 constexpr u32 element_stride{4}; 542 constexpr u32 element_stride{4};
539 const u32 address{generic_base + index * generic_stride + element * element_stride}; 543 const u32 address{generic_base + index * generic_stride + element * element_stride};
540 544
541 const bool declared{stage != ShaderStage::Fragment || 545 const bool declared{stage != ProgramType::Fragment ||
542 header.ps.GetAttributeUse(index) != AttributeUse::Unused}; 546 header.ps.GetAttributeUse(index) != AttributeUse::Unused};
543 const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; 547 const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
544 code.AddLine("case 0x{:x}: return {};", address, value); 548 code.AddLine("case 0x{:x}: return {};", address, value);
@@ -642,7 +646,7 @@ private:
642 } 646 }
643 647
644 if (const auto abuf = std::get_if<AbufNode>(&*node)) { 648 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
645 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, 649 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
646 "Physical attributes in geometry shaders are not implemented"); 650 "Physical attributes in geometry shaders are not implemented");
647 if (abuf->IsPhysicalBuffer()) { 651 if (abuf->IsPhysicalBuffer()) {
648 return fmt::format("readPhysicalAttribute(ftou({}))", 652 return fmt::format("readPhysicalAttribute(ftou({}))",
@@ -697,6 +701,9 @@ private:
697 } 701 }
698 702
699 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 703 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
704 if (stage == ProgramType::Compute) {
705 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
706 }
700 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 707 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
701 } 708 }
702 709
@@ -726,7 +733,7 @@ private:
726 733
727 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { 734 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
728 const auto GeometryPass = [&](std::string_view name) { 735 const auto GeometryPass = [&](std::string_view name) {
729 if (stage == ShaderStage::Geometry && buffer) { 736 if (stage == ProgramType::Geometry && buffer) {
730 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 737 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
731 // set an 0x80000000 index for those and the shader fails to build. Find out why 738 // set an 0x80000000 index for those and the shader fails to build. Find out why
732 // this happens and what's its intent. 739 // this happens and what's its intent.
@@ -738,10 +745,10 @@ private:
738 switch (attribute) { 745 switch (attribute) {
739 case Attribute::Index::Position: 746 case Attribute::Index::Position:
740 switch (stage) { 747 switch (stage) {
741 case ShaderStage::Geometry: 748 case ProgramType::Geometry:
742 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), 749 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
743 GetSwizzle(element)); 750 GetSwizzle(element));
744 case ShaderStage::Fragment: 751 case ProgramType::Fragment:
745 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); 752 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
746 default: 753 default:
747 UNREACHABLE(); 754 UNREACHABLE();
@@ -762,7 +769,7 @@ private:
762 // TODO(Subv): Find out what the values are for the first two elements when inside a 769 // TODO(Subv): Find out what the values are for the first two elements when inside a
763 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 770 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
764 // shader. 771 // shader.
765 ASSERT(stage == ShaderStage::Vertex); 772 ASSERT(IsVertexShader(stage));
766 switch (element) { 773 switch (element) {
767 case 2: 774 case 2:
768 // Config pack's first value is instance_id. 775 // Config pack's first value is instance_id.
@@ -774,7 +781,7 @@ private:
774 return "0"; 781 return "0";
775 case Attribute::Index::FrontFacing: 782 case Attribute::Index::FrontFacing:
776 // TODO(Subv): Find out what the values are for the other elements. 783 // TODO(Subv): Find out what the values are for the other elements.
777 ASSERT(stage == ShaderStage::Fragment); 784 ASSERT(stage == ProgramType::Fragment);
778 switch (element) { 785 switch (element) {
779 case 3: 786 case 3:
780 return "itof(gl_FrontFacing ? -1 : 0)"; 787 return "itof(gl_FrontFacing ? -1 : 0)";
@@ -796,7 +803,7 @@ private:
796 return value; 803 return value;
797 } 804 }
798 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders 805 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
799 const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; 806 const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
800 807
801 const std::string temporary = code.GenerateTemporary(); 808 const std::string temporary = code.GenerateTemporary();
802 code.AddLine("{}float {} = {};", precise, temporary, value); 809 code.AddLine("{}float {} = {};", precise, temporary, value);
@@ -831,12 +838,12 @@ private:
831 UNIMPLEMENTED(); 838 UNIMPLEMENTED();
832 return {}; 839 return {};
833 case 1: 840 case 1:
834 if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { 841 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
835 return {}; 842 return {};
836 } 843 }
837 return std::make_pair("gl_Layer", true); 844 return std::make_pair("gl_Layer", true);
838 case 2: 845 case 2:
839 if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { 846 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
840 return {}; 847 return {};
841 } 848 }
842 return std::make_pair("gl_ViewportIndex", true); 849 return std::make_pair("gl_ViewportIndex", true);
@@ -1073,6 +1080,9 @@ private:
1073 target = result->first; 1080 target = result->first;
1074 is_integer = result->second; 1081 is_integer = result->second;
1075 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1082 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1083 if (stage == ProgramType::Compute) {
1084 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
1085 }
1076 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 1086 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
1077 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1087 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1078 const std::string real = Visit(gmem->GetRealAddress()); 1088 const std::string real = Visit(gmem->GetRealAddress());
@@ -1400,14 +1410,10 @@ private:
1400 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); 1410 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
1401 } 1411 }
1402 1412
1403 std::string LogicalAll2(Operation operation) { 1413 std::string LogicalAnd2(Operation operation) {
1404 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); 1414 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1405 } 1415 }
1406 1416
1407 std::string LogicalAny2(Operation operation) {
1408 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1409 }
1410
1411 template <bool with_nan> 1417 template <bool with_nan>
1412 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { 1418 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
1413 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, 1419 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1630,7 +1636,7 @@ private:
1630 } 1636 }
1631 1637
1632 std::string Exit(Operation operation) { 1638 std::string Exit(Operation operation) {
1633 if (stage != ShaderStage::Fragment) { 1639 if (stage != ProgramType::Fragment) {
1634 code.AddLine("return;"); 1640 code.AddLine("return;");
1635 return {}; 1641 return {};
1636 } 1642 }
@@ -1681,7 +1687,7 @@ private:
1681 } 1687 }
1682 1688
1683 std::string EmitVertex(Operation operation) { 1689 std::string EmitVertex(Operation operation) {
1684 ASSERT_MSG(stage == ShaderStage::Geometry, 1690 ASSERT_MSG(stage == ProgramType::Geometry,
1685 "EmitVertex is expected to be used in a geometry shader."); 1691 "EmitVertex is expected to be used in a geometry shader.");
1686 1692
1687 // If a geometry shader is attached, it will always flip (it's the last stage before 1693 // If a geometry shader is attached, it will always flip (it's the last stage before
@@ -1692,7 +1698,7 @@ private:
1692 } 1698 }
1693 1699
1694 std::string EndPrimitive(Operation operation) { 1700 std::string EndPrimitive(Operation operation) {
1695 ASSERT_MSG(stage == ShaderStage::Geometry, 1701 ASSERT_MSG(stage == ProgramType::Geometry,
1696 "EndPrimitive is expected to be used in a geometry shader."); 1702 "EndPrimitive is expected to be used in a geometry shader.");
1697 1703
1698 code.AddLine("EndPrimitive();"); 1704 code.AddLine("EndPrimitive();");
@@ -1714,7 +1720,7 @@ private:
1714 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1720 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1715 } 1721 }
1716 1722
1717 static constexpr OperationDecompilersArray operation_decompilers = { 1723 static constexpr std::array operation_decompilers = {
1718 &GLSLDecompiler::Assign, 1724 &GLSLDecompiler::Assign,
1719 1725
1720 &GLSLDecompiler::Select, 1726 &GLSLDecompiler::Select,
@@ -1798,8 +1804,7 @@ private:
1798 &GLSLDecompiler::LogicalXor, 1804 &GLSLDecompiler::LogicalXor,
1799 &GLSLDecompiler::LogicalNegate, 1805 &GLSLDecompiler::LogicalNegate,
1800 &GLSLDecompiler::LogicalPick2, 1806 &GLSLDecompiler::LogicalPick2,
1801 &GLSLDecompiler::LogicalAll2, 1807 &GLSLDecompiler::LogicalAnd2,
1802 &GLSLDecompiler::LogicalAny2,
1803 1808
1804 &GLSLDecompiler::LogicalLessThan<Type::Float>, 1809 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1805 &GLSLDecompiler::LogicalEqual<Type::Float>, 1810 &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1863,6 +1868,7 @@ private:
1863 &GLSLDecompiler::WorkGroupId<1>, 1868 &GLSLDecompiler::WorkGroupId<1>,
1864 &GLSLDecompiler::WorkGroupId<2>, 1869 &GLSLDecompiler::WorkGroupId<2>,
1865 }; 1870 };
1871 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1866 1872
1867 std::string GetRegister(u32 index) const { 1873 std::string GetRegister(u32 index) const {
1868 return GetDeclarationWithSuffix(index, "gpr"); 1874 return GetDeclarationWithSuffix(index, "gpr");
@@ -1927,7 +1933,7 @@ private:
1927 } 1933 }
1928 1934
1929 u32 GetNumPhysicalInputAttributes() const { 1935 u32 GetNumPhysicalInputAttributes() const {
1930 return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); 1936 return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
1931 } 1937 }
1932 1938
1933 u32 GetNumPhysicalAttributes() const { 1939 u32 GetNumPhysicalAttributes() const {
@@ -1940,7 +1946,7 @@ private:
1940 1946
1941 const Device& device; 1947 const Device& device;
1942 const ShaderIR& ir; 1948 const ShaderIR& ir;
1943 const ShaderStage stage; 1949 const ProgramType stage;
1944 const std::string suffix; 1950 const std::string suffix;
1945 const Header header; 1951 const Header header;
1946 1952
@@ -1971,7 +1977,7 @@ std::string GetCommonDeclarations() {
1971 MAX_CONSTBUFFER_ELEMENTS); 1977 MAX_CONSTBUFFER_ELEMENTS);
1972} 1978}
1973 1979
1974ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, 1980ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
1975 const std::string& suffix) { 1981 const std::string& suffix) {
1976 GLSLDecompiler decompiler(device, ir, stage, suffix); 1982 GLSLDecompiler decompiler(device, ir, stage, suffix);
1977 decompiler.Decompile(); 1983 decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 02586736d..2ea02f5bf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,14 +12,26 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/shader/shader_ir.h" 13#include "video_core/shader/shader_ir.h"
14 14
15namespace OpenGL {
16class Device;
17}
18
19namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
20class ShaderIR; 16class ShaderIR;
21} 17}
22 18
19namespace OpenGL {
20
21class Device;
22
23enum class ProgramType : u32 {
24 VertexA = 0,
25 VertexB = 1,
26 TessellationControl = 2,
27 TessellationEval = 3,
28 Geometry = 4,
29 Fragment = 5,
30 Compute = 6
31};
32
33} // namespace OpenGL
34
23namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
24 36
25struct ShaderEntries; 37struct ShaderEntries;
@@ -85,6 +97,6 @@ struct ShaderEntries {
85std::string GetCommonDeclarations(); 97std::string GetCommonDeclarations();
86 98
87ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 99ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
88 Maxwell::ShaderStage stage, const std::string& suffix); 100 ProgramType stage, const std::string& suffix);
89 101
90} // namespace OpenGL::GLShader 102} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 7893d1e26..969fe9ced 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
51 51
52} // namespace 52} // namespace
53 53
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b, 55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 56 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 57 : unique_identifier{unique_identifier}, program_type{program_type},
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 4f296dda6..cc8bbd61e 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -18,7 +18,6 @@
18#include "common/assert.h" 18#include "common/assert.h"
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 21#include "video_core/renderer_opengl/gl_shader_gen.h"
23 22
24namespace Core { 23namespace Core {
@@ -34,14 +33,11 @@ namespace OpenGL {
34struct ShaderDiskCacheUsage; 33struct ShaderDiskCacheUsage;
35struct ShaderDiskCacheDump; 34struct ShaderDiskCacheDump;
36 35
37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
38
39using ProgramCode = std::vector<u64>; 36using ProgramCode = std::vector<u64>;
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
41
42using TextureBufferUsage = std::bitset<64>; 38using TextureBufferUsage = std::bitset<64>;
43 39
44/// Allocated bindings used by an OpenGL shader program. 40/// Allocated bindings used by an OpenGL shader program
45struct BaseBindings { 41struct BaseBindings {
46 u32 cbuf{}; 42 u32 cbuf{};
47 u32 gmem{}; 43 u32 gmem{};
@@ -126,7 +122,7 @@ namespace OpenGL {
126/// Describes a shader how it's used by the guest GPU 122/// Describes a shader how it's used by the guest GPU
127class ShaderDiskCacheRaw { 123class ShaderDiskCacheRaw {
128public: 124public:
129 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
130 u32 program_code_size, u32 program_code_size_b, 126 u32 program_code_size, u32 program_code_size_b,
131 ProgramCode program_code, ProgramCode program_code_b); 127 ProgramCode program_code, ProgramCode program_code_b);
132 ShaderDiskCacheRaw(); 128 ShaderDiskCacheRaw();
@@ -141,30 +137,13 @@ public:
141 } 137 }
142 138
143 bool HasProgramA() const { 139 bool HasProgramA() const {
144 return program_type == Maxwell::ShaderProgram::VertexA; 140 return program_type == ProgramType::VertexA;
145 } 141 }
146 142
147 Maxwell::ShaderProgram GetProgramType() const { 143 ProgramType GetProgramType() const {
148 return program_type; 144 return program_type;
149 } 145 }
150 146
151 Maxwell::ShaderStage GetProgramStage() const {
152 switch (program_type) {
153 case Maxwell::ShaderProgram::VertexA:
154 case Maxwell::ShaderProgram::VertexB:
155 return Maxwell::ShaderStage::Vertex;
156 case Maxwell::ShaderProgram::TesselationControl:
157 return Maxwell::ShaderStage::TesselationControl;
158 case Maxwell::ShaderProgram::TesselationEval:
159 return Maxwell::ShaderStage::TesselationEval;
160 case Maxwell::ShaderProgram::Geometry:
161 return Maxwell::ShaderStage::Geometry;
162 case Maxwell::ShaderProgram::Fragment:
163 return Maxwell::ShaderStage::Fragment;
164 }
165 UNREACHABLE();
166 }
167
168 const ProgramCode& GetProgramCode() const { 147 const ProgramCode& GetProgramCode() const {
169 return program_code; 148 return program_code;
170 } 149 }
@@ -175,7 +154,7 @@ public:
175 154
176private: 155private:
177 u64 unique_identifier{}; 156 u64 unique_identifier{};
178 Maxwell::ShaderProgram program_type{}; 157 ProgramType program_type{};
179 u32 program_code_size{}; 158 u32 program_code_size{};
180 u32 program_code_size_b{}; 159 u32 program_code_size_b{};
181 160
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index f9ee8429e..3a8d9e1da 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
14using VideoCommon::Shader::ProgramCode; 14using VideoCommon::Shader::ProgramCode;
15using VideoCommon::Shader::ShaderIR; 15using VideoCommon::Shader::ShaderIR;
16 16
17static constexpr u32 PROGRAM_OFFSET{10}; 17static constexpr u32 PROGRAM_OFFSET = 10;
18static constexpr u32 COMPUTE_OFFSET = 0;
18 19
19ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { 20ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
29}; 30};
30 31
31)"; 32)";
32 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
33 ProgramResult program =
34 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
35 33
34 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
35 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
36 ProgramResult program = Decompile(device, program_ir, stage, "vertex");
36 out += program.first; 37 out += program.first;
37 38
38 if (setup.IsDualProgram()) { 39 if (setup.IsDualProgram()) {
39 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); 40 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
40 ProgramResult program_b = 41 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
41 Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
42
43 out += program_b.first; 42 out += program_b.first;
44 } 43 }
45 44
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
80}; 79};
81 80
82)"; 81)";
82
83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); 83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
84 ProgramResult program = 84 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
85 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
86 out += program.first; 85 out += program.first;
87 86
88 out += R"( 87 out += R"(
@@ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
116 115
117)"; 116)";
118 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); 117 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
119 ProgramResult program = 118 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
120 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
121
122 out += program.first; 119 out += program.first;
123 120
124 out += R"( 121 out += R"(
@@ -130,4 +127,22 @@ void main() {
130 return {std::move(out), std::move(program.second)}; 127 return {std::move(out), std::move(program.second)};
131} 128}
132 129
130ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
131 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
132
133 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
134 out += GetCommonDeclarations();
135
136 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
137 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
138 out += program.first;
139
140 out += R"(
141void main() {
142 execute_compute();
143}
144)";
145 return {std::move(out), std::move(program.second)};
146}
147
133} // namespace OpenGL::GLShader 148} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 7cbc590f8..3833e88ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
54/// Generates the GLSL fragment shader program source code for the given FS program 54/// Generates the GLSL fragment shader program source code for the given FS program
55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); 55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
56 56
57/// Generates the GLSL compute shader program source code for the given CS program
58ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
59
57} // namespace OpenGL::GLShader 60} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5f3fe067e..9e74eda0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,21 +10,25 @@
10 10
11namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
12 12
13GLuint LoadShader(const char* source, GLenum type) { 13namespace {
14 const char* debug_type; 14const char* GetStageDebugName(GLenum type) {
15 switch (type) { 15 switch (type) {
16 case GL_VERTEX_SHADER: 16 case GL_VERTEX_SHADER:
17 debug_type = "vertex"; 17 return "vertex";
18 break;
19 case GL_GEOMETRY_SHADER: 18 case GL_GEOMETRY_SHADER:
20 debug_type = "geometry"; 19 return "geometry";
21 break;
22 case GL_FRAGMENT_SHADER: 20 case GL_FRAGMENT_SHADER:
23 debug_type = "fragment"; 21 return "fragment";
24 break; 22 case GL_COMPUTE_SHADER:
25 default: 23 return "compute";
26 UNREACHABLE();
27 } 24 }
25 UNIMPLEMENTED();
26 return "unknown";
27}
28} // Anonymous namespace
29
30GLuint LoadShader(const char* source, GLenum type) {
31 const char* debug_type = GetStageDebugName(type);
28 const GLuint shader_id = glCreateShader(type); 32 const GLuint shader_id = glCreateShader(type);
29 glShaderSource(shader_id, 1, &source, nullptr); 33 glShaderSource(shader_id, 1, &source, nullptr);
30 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); 34 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 0eae98afe..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -165,6 +165,25 @@ OpenGLState::OpenGLState() {
165 alpha_test.ref = 0.0f; 165 alpha_test.ref = 0.0f;
166} 166}
167 167
168void OpenGLState::SetDefaultViewports() {
169 for (auto& item : viewports) {
170 item.x = 0;
171 item.y = 0;
172 item.width = 0;
173 item.height = 0;
174 item.depth_range_near = 0.0f;
175 item.depth_range_far = 1.0f;
176 item.scissor.enabled = false;
177 item.scissor.x = 0;
178 item.scissor.y = 0;
179 item.scissor.width = 0;
180 item.scissor.height = 0;
181 }
182
183 depth_clamp.far_plane = false;
184 depth_clamp.near_plane = false;
185}
186
168void OpenGLState::ApplyDefaultState() { 187void OpenGLState::ApplyDefaultState() {
169 glEnable(GL_BLEND); 188 glEnable(GL_BLEND);
170 glDisable(GL_FRAMEBUFFER_SRGB); 189 glDisable(GL_FRAMEBUFFER_SRGB);
@@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const {
526 } 545 }
527} 546}
528 547
529void OpenGLState::Apply() const { 548void OpenGLState::Apply() {
530 MICROPROFILE_SCOPE(OpenGL_State); 549 MICROPROFILE_SCOPE(OpenGL_State);
531 ApplyFramebufferState(); 550 ApplyFramebufferState();
532 ApplyVertexArrayState(); 551 ApplyVertexArrayState();
@@ -536,19 +555,31 @@ void OpenGLState::Apply() const {
536 ApplyPointSize(); 555 ApplyPointSize();
537 ApplyFragmentColorClamp(); 556 ApplyFragmentColorClamp();
538 ApplyMultisample(); 557 ApplyMultisample();
558 if (dirty.color_mask) {
559 ApplyColorMask();
560 dirty.color_mask = false;
561 }
539 ApplyDepthClamp(); 562 ApplyDepthClamp();
540 ApplyColorMask();
541 ApplyViewport(); 563 ApplyViewport();
542 ApplyStencilTest(); 564 if (dirty.stencil_state) {
565 ApplyStencilTest();
566 dirty.stencil_state = false;
567 }
543 ApplySRgb(); 568 ApplySRgb();
544 ApplyCulling(); 569 ApplyCulling();
545 ApplyDepth(); 570 ApplyDepth();
546 ApplyPrimitiveRestart(); 571 ApplyPrimitiveRestart();
547 ApplyBlending(); 572 if (dirty.blend_state) {
573 ApplyBlending();
574 dirty.blend_state = false;
575 }
548 ApplyLogicOp(); 576 ApplyLogicOp();
549 ApplyTextures(); 577 ApplyTextures();
550 ApplySamplers(); 578 ApplySamplers();
551 ApplyPolygonOffset(); 579 if (dirty.polygon_offset) {
580 ApplyPolygonOffset();
581 dirty.polygon_offset = false;
582 }
552 ApplyAlphaTest(); 583 ApplyAlphaTest();
553} 584}
554 585
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,8 +195,9 @@ public:
195 s_rgb_used = false; 195 s_rgb_used = false;
196 } 196 }
197 197
198 void SetDefaultViewports();
198 /// Apply this state as the current OpenGL state 199 /// Apply this state as the current OpenGL state
199 void Apply() const; 200 void Apply();
200 201
201 void ApplyFramebufferState() const; 202 void ApplyFramebufferState() const;
202 void ApplyVertexArrayState() const; 203 void ApplyVertexArrayState() const;
@@ -237,11 +238,41 @@ public:
237 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test 238 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
238 void EmulateViewportWithScissor(); 239 void EmulateViewportWithScissor();
239 240
241 void MarkDirtyBlendState() {
242 dirty.blend_state = true;
243 }
244
245 void MarkDirtyStencilState() {
246 dirty.stencil_state = true;
247 }
248
249 void MarkDirtyPolygonOffset() {
250 dirty.polygon_offset = true;
251 }
252
253 void MarkDirtyColorMask() {
254 dirty.color_mask = true;
255 }
256
257 void AllDirty() {
258 dirty.blend_state = true;
259 dirty.stencil_state = true;
260 dirty.polygon_offset = true;
261 dirty.color_mask = true;
262 }
263
240private: 264private:
241 static OpenGLState cur_state; 265 static OpenGLState cur_state;
242 266
243 // Workaround for sRGB problems caused by QT not supporting srgb output 267 // Workaround for sRGB problems caused by QT not supporting srgb output
244 static bool s_rgb_used; 268 static bool s_rgb_used;
269 struct {
270 bool blend_state;
271 bool stencil_state;
272 bool viewport_state;
273 bool polygon_offset;
274 bool color_mask;
275 } dirty{};
245}; 276};
246 277
247} // namespace OpenGL 278} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 6ecb02c45..408332f90 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -484,11 +484,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
484 const auto& dst_params{dst_view->GetSurfaceParams()}; 484 const auto& dst_params{dst_view->GetSurfaceParams()};
485 485
486 OpenGLState prev_state{OpenGLState::GetCurState()}; 486 OpenGLState prev_state{OpenGLState::GetCurState()};
487 SCOPE_EXIT({ prev_state.Apply(); }); 487 SCOPE_EXIT({
488 prev_state.AllDirty();
489 prev_state.Apply();
490 });
488 491
489 OpenGLState state; 492 OpenGLState state;
490 state.draw.read_framebuffer = src_framebuffer.handle; 493 state.draw.read_framebuffer = src_framebuffer.handle;
491 state.draw.draw_framebuffer = dst_framebuffer.handle; 494 state.draw.draw_framebuffer = dst_framebuffer.handle;
495 state.AllDirty();
492 state.Apply(); 496 state.Apply();
493 497
494 u32 buffers{}; 498 u32 buffers{};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9ecdddb0d..a05cef3b9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
108 108
109 // Maintain the rasterizer's state as a priority 109 // Maintain the rasterizer's state as a priority
110 OpenGLState prev_state = OpenGLState::GetCurState(); 110 OpenGLState prev_state = OpenGLState::GetCurState();
111 state.AllDirty();
111 state.Apply(); 112 state.Apply();
112 113
113 if (framebuffer) { 114 if (framebuffer) {
@@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
140 system.GetPerfStats().BeginSystemFrame(); 141 system.GetPerfStats().BeginSystemFrame();
141 142
142 // Restore the rasterizer state 143 // Restore the rasterizer state
144 prev_state.AllDirty();
143 prev_state.Apply(); 145 prev_state.Apply();
144} 146}
145 147
@@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
206 // Link shaders and get variable locations 208 // Link shaders and get variable locations
207 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 209 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
208 state.draw.shader_program = shader.handle; 210 state.draw.shader_program = shader.handle;
211 state.AllDirty();
209 state.Apply(); 212 state.Apply();
210 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); 213 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
211 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); 214 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
338 // Workaround brigthness problems in SMO by enabling sRGB in the final output 341 // Workaround brigthness problems in SMO by enabling sRGB in the final output
339 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 342 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
340 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 343 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
344 state.AllDirty();
341 state.Apply(); 345 state.Apply();
342 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); 346 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
343 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 347 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
344 // Restore default state 348 // Restore default state
345 state.framebuffer_srgb.enabled = false; 349 state.framebuffer_srgb.enabled = false;
346 state.texture_units[0].texture = 0; 350 state.texture_units[0].texture = 0;
351 state.AllDirty();
347 state.Apply(); 352 state.Apply();
348 // Clear sRGB state for the next frame 353 // Clear sRGB state for the next frame
349 OpenGLState::ClearsRGBUsed(); 354 OpenGLState::ClearsRGBUsed();
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
388 GLuint old_read_fb = state.draw.read_framebuffer; 393 GLuint old_read_fb = state.draw.read_framebuffer;
389 GLuint old_draw_fb = state.draw.draw_framebuffer; 394 GLuint old_draw_fb = state.draw.draw_framebuffer;
390 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; 395 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
396 state.AllDirty();
391 state.Apply(); 397 state.Apply();
392 398
393 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 399 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
407 screenshot_framebuffer.Release(); 413 screenshot_framebuffer.Release();
408 state.draw.read_framebuffer = old_read_fb; 414 state.draw.read_framebuffer = old_read_fb;
409 state.draw.draw_framebuffer = old_draw_fb; 415 state.draw.draw_framebuffer = old_draw_fb;
416 state.AllDirty();
410 state.Apply(); 417 state.Apply();
411 glDeleteRenderbuffers(1, &renderbuffer); 418 glDeleteRenderbuffers(1, &renderbuffer);
412 419
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 9b2d8e987..d267712c9 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
205 } 205 }
206 206
207private: 207private:
208 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
209 using OperationDecompilersArray =
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 208 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 209
214 void AllocateBindings() { 210 void AllocateBindings() {
@@ -804,12 +800,7 @@ private:
804 return {}; 800 return {};
805 } 801 }
806 802
807 Id LogicalAll2(Operation operation) { 803 Id LogicalAnd2(Operation operation) {
808 UNIMPLEMENTED();
809 return {};
810 }
811
812 Id LogicalAny2(Operation operation) {
813 UNIMPLEMENTED(); 804 UNIMPLEMENTED();
814 return {}; 805 return {};
815 } 806 }
@@ -1206,7 +1197,7 @@ private:
1206 return {}; 1197 return {};
1207 } 1198 }
1208 1199
1209 static constexpr OperationDecompilersArray operation_decompilers = { 1200 static constexpr std::array operation_decompilers = {
1210 &SPIRVDecompiler::Assign, 1201 &SPIRVDecompiler::Assign,
1211 1202
1212 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, 1203 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1291,8 +1282,7 @@ private:
1291 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, 1282 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1292 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, 1283 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1293 &SPIRVDecompiler::LogicalPick2, 1284 &SPIRVDecompiler::LogicalPick2,
1294 &SPIRVDecompiler::LogicalAll2, 1285 &SPIRVDecompiler::LogicalAnd2,
1295 &SPIRVDecompiler::LogicalAny2,
1296 1286
1297 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, 1287 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1298 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, 1288 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1357,6 +1347,7 @@ private:
1357 &SPIRVDecompiler::WorkGroupId<1>, 1347 &SPIRVDecompiler::WorkGroupId<1>,
1358 &SPIRVDecompiler::WorkGroupId<2>, 1348 &SPIRVDecompiler::WorkGroupId<2>,
1359 }; 1349 };
1350 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1360 1351
1361 const VKDevice& device; 1352 const VKDevice& device;
1362 const ShaderIR& ir; 1353 const ShaderIR& ir;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 29c8895c5..afffd157f 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -46,12 +46,12 @@ void ShaderIR::Decode() {
46 coverage_end = shader_info.end; 46 coverage_end = shader_info.end;
47 if (shader_info.decompilable) { 47 if (shader_info.decompilable) {
48 disable_flow_stack = true; 48 disable_flow_stack = true;
49 const auto insert_block = ([this](NodeBlock& nodes, u32 label) { 49 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
50 if (label == exit_branch) { 50 if (label == exit_branch) {
51 return; 51 return;
52 } 52 }
53 basic_blocks.insert({label, nodes}); 53 basic_blocks.insert({label, nodes});
54 }); 54 };
55 const auto& blocks = shader_info.blocks; 55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block; 56 NodeBlock current_block;
57 u32 current_label = exit_branch; 57 u32 current_label = exit_branch;
@@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
103} 103}
104 104
105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { 105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
106 const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { 106 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
107 Node result = n; 107 Node result = n;
108 if (cond.cc != ConditionCode::T) { 108 if (cond.cc != ConditionCode::T) {
109 result = Conditional(GetConditionCode(cond.cc), {result}); 109 result = Conditional(GetConditionCode(cond.cc), {result});
@@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
117 result = Conditional(GetPredicate(pred, is_neg), {result}); 117 result = Conditional(GetPredicate(pred, is_neg), {result});
118 } 118 }
119 return result; 119 return result;
120 }); 120 };
121 if (block.branch.address < 0) { 121 if (block.branch.address < 0) {
122 if (block.branch.kills) { 122 if (block.branch.kills) {
123 Node n = Operation(OperationCode::Discard); 123 Node n = Operation(OperationCode::Discard);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 4587dbd00..a82a6a15c 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
25 25
26 Node op_b = [&]() { 26 Tegra::Shader::PredCondition cond{};
27 switch (opcode->get().GetId()) { 27 bool h_and{};
28 case OpCode::Id::HSETP2_R: 28 Node op_b{};
29 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 29 switch (opcode->get().GetId()) {
30 instr.hsetp2.negate_b); 30 case OpCode::Id::HSETP2_C:
31 default: 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 UNREACHABLE(); 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 return Immediate(0); 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
34 } 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 }(); 35 break;
36 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); 36 case OpCode::Id::HSETP2_IMM:
37 37 cond = instr.hsetp2.cbuf_and_imm.cond;
38 // We can't use the constant predicate as destination. 38 h_and = instr.hsetp2.cbuf_and_imm.h_and;
39 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 39 op_b = UnpackHalfImmediate(instr, true);
40 40 break;
41 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 41 case OpCode::Id::HSETP2_R:
42 cond = instr.hsetp2.reg.cond;
43 h_and = instr.hsetp2.reg.h_and;
44 op_b =
45 UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
46 instr.hsetp2.reg.negate_b),
47 instr.hsetp2.reg.type_b);
48 break;
49 default:
50 UNREACHABLE();
51 op_b = Immediate(0);
52 }
42 53
43 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
44 const OperationCode pair_combiner = 55 const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
45 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
46
47 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
48 const Node first_pred = Operation(pair_combiner, comparison);
49 56
50 // Set the primary predicate to the result of Predicate OP SecondPredicate 57 const auto Write = [&](u64 dest, Node src) {
51 const Node value = Operation(combiner, first_pred, second_pred); 58 SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
52 SetPredicate(bb, instr.hsetp2.pred3, value); 59 };
53 60
54 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
55 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled 62 const u64 first = instr.hsetp2.pred0;
56 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); 63 const u64 second = instr.hsetp2.pred3;
57 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); 64 if (h_and) {
65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
66 Write(first, joined);
67 Write(second, Operation(OperationCode::LogicalNegate, joined));
68 } else {
69 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
70 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
58 } 71 }
59 72
60 return pc; 73 return pc;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ab207a33b..ed108bea8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
95 const Node op_b = 95 const Node op_b =
96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); 96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
97 97
98 SetTemporal(bb, 0, op_a); 98 SetTemporary(bb, 0, op_a);
99 SetTemporal(bb, 1, op_b); 99 SetTemporary(bb, 1, op_b);
100 SetRegister(bb, instr.gpr0, GetTemporal(0)); 100 SetRegister(bb, instr.gpr0, GetTemporary(0));
101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); 101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
102 break; 102 break;
103 } 103 }
104 default: 104 default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
136 } 136 }
137 }(); 137 }();
138 for (u32 i = 0; i < count; ++i) 138 for (u32 i = 0; i < count; ++i)
139 SetTemporal(bb, i, GetLmem(i * 4)); 139 SetTemporary(bb, i, GetLmem(i * 4));
140 for (u32 i = 0; i < count; ++i) 140 for (u32 i = 0; i < count; ++i)
141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
142 break; 142 break;
143 } 143 }
144 default: 144 default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
174 174
175 SetTemporal(bb, i, gmem); 175 SetTemporary(bb, i, gmem);
176 } 176 }
177 for (u32 i = 0; i < count; ++i) { 177 for (u32 i = 0; i < count; ++i) {
178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
179 } 179 }
180 break; 180 break;
181 } 181 }
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
253 TrackAndGetGlobalMemory(bb, instr, true); 253 TrackAndGetGlobalMemory(bb, instr, true);
254 254
255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} 255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
256 SetTemporal(bb, 0, real_address_base); 256 SetTemporary(bb, 0, real_address_base);
257 257
258 const u32 count = GetUniformTypeElementsCount(type); 258 const u32 count = GetUniformTypeElementsCount(type);
259 for (u32 i = 0; i < count; ++i) { 259 for (u32 i = 0; i < count; ++i) {
260 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); 260 SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
261 } 261 }
262 for (u32 i = 0; i < count; ++i) { 262 for (u32 i = 0; i < count; ++i) {
263 const Node it_offset = Immediate(i * 4); 263 const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
267 267
268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); 268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
269 } 269 }
270 break; 270 break;
271 } 271 }
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index e1ee5c190..0b934a069 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
181 const Node value = 181 const Node value =
182 Operation(OperationCode::TextureQueryDimensions, meta, 182 Operation(OperationCode::TextureQueryDimensions, meta,
183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); 183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
184 SetTemporal(bb, indexer++, value); 184 SetTemporary(bb, indexer++, value);
185 } 185 }
186 for (u32 i = 0; i < indexer; ++i) { 186 for (u32 i = 0; i < indexer; ++i) {
187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
188 } 188 }
189 break; 189 break;
190 } 190 }
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
238 auto params = coords; 238 auto params = coords;
239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; 239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
241 SetTemporal(bb, indexer++, value); 241 SetTemporary(bb, indexer++, value);
242 } 242 }
243 for (u32 i = 0; i < indexer; ++i) { 243 for (u32 i = 0; i < indexer; ++i) {
244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
245 } 245 }
246 break; 246 break;
247 } 247 }
@@ -336,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
336 // Skip disabled components 336 // Skip disabled components
337 continue; 337 continue;
338 } 338 }
339 SetTemporal(bb, dest_elem++, components[elem]); 339 SetTemporary(bb, dest_elem++, components[elem]);
340 } 340 }
341 // After writing values in temporals, move them to the real registers 341 // After writing values in temporals, move them to the real registers
342 for (u32 i = 0; i < dest_elem; ++i) { 342 for (u32 i = 0; i < dest_elem; ++i) {
343 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 343 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
344 } 344 }
345} 345}
346 346
@@ -353,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
353 for (u32 component = 0; component < 4; ++component) { 353 for (u32 component = 0; component < 4; ++component) {
354 if (!instr.texs.IsComponentEnabled(component)) 354 if (!instr.texs.IsComponentEnabled(component))
355 continue; 355 continue;
356 SetTemporal(bb, dest_elem++, components[component]); 356 SetTemporary(bb, dest_elem++, components[component]);
357 } 357 }
358 358
359 for (u32 i = 0; i < dest_elem; ++i) { 359 for (u32 i = 0; i < dest_elem; ++i) {
360 if (i < 2) { 360 if (i < 2) {
361 // Write the first two swizzle components to gpr0 and gpr0+1 361 // Write the first two swizzle components to gpr0 and gpr0+1
362 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); 362 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
363 } else { 363 } else {
364 ASSERT(instr.texs.HasTwoDestinations()); 364 ASSERT(instr.texs.HasTwoDestinations());
365 // Write the rest of the swizzle components to gpr28 and gpr28+1 365 // Write the rest of the swizzle components to gpr28 and gpr28+1
366 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); 366 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
367 } 367 }
368 } 368 }
369} 369}
@@ -391,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
391 return; 391 return;
392 } 392 }
393 393
394 SetTemporal(bb, 0, first_value); 394 SetTemporary(bb, 0, first_value);
395 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); 395 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
396 396
397 SetRegister(bb, instr.gpr0, GetTemporal(0)); 397 SetRegister(bb, instr.gpr0, GetTemporary(0));
398 SetRegister(bb, instr.gpr28, GetTemporal(1)); 398 SetRegister(bb, instr.gpr28, GetTemporary(1));
399} 399}
400 400
401Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 401Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 93dee77d1..206961909 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
73 if (is_psl) { 73 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
75 } 75 }
76 SetTemporal(bb, 0, product); 76 SetTemporary(bb, 0, product);
77 product = GetTemporal(0); 77 product = GetTemporary(0);
78 78
79 const Node original_c = op_c; 79 const Node original_c = op_c;
80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error 80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
98 } 98 }
99 }(); 99 }();
100 100
101 SetTemporal(bb, 1, op_c); 101 SetTemporary(bb, 1, op_c);
102 op_c = GetTemporal(1); 102 op_c = GetTemporary(1);
103 103
104 // TODO(Rodrigo): Use an appropiate sign for this operation 104 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 105 Node sum = Operation(OperationCode::IAdd, product, op_c);
106 SetTemporal(bb, 2, sum); 106 SetTemporary(bb, 2, sum);
107 sum = GetTemporal(2); 107 sum = GetTemporary(2);
108 if (is_merge) { 108 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 109 const Node a = BitfieldExtract(sum, 0, 16);
110 const Node b = 110 const Node b =
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 7427ed896..715184d67 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -101,8 +101,7 @@ enum class OperationCode {
101 LogicalXor, /// (bool a, bool b) -> bool 101 LogicalXor, /// (bool a, bool b) -> bool
102 LogicalNegate, /// (bool a) -> bool 102 LogicalNegate, /// (bool a) -> bool
103 LogicalPick2, /// (bool2 pair, uint index) -> bool 103 LogicalPick2, /// (bool2 pair, uint index) -> bool
104 LogicalAll2, /// (bool2 a) -> bool 104 LogicalAnd2, /// (bool2 a) -> bool
105 LogicalAny2, /// (bool2 a) -> bool
106 105
107 LogicalFLessThan, /// (float a, float b) -> bool 106 LogicalFLessThan, /// (float a, float b) -> bool
108 LogicalFEqual, /// (float a, float b) -> bool 107 LogicalFEqual, /// (float a, float b) -> bool
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6fccbbba3..b3dcd291c 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14Node Conditional(Node condition, std::vector<Node> code) { 14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(condition, std::move(code)); 15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16} 16}
17 17
18Node Comment(std::string text) { 18Node Comment(std::string text) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 78bd1cf1e..5e91fe129 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -61,7 +61,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
61 const auto [entry, is_new] = used_cbufs.try_emplace(index); 61 const auto [entry, is_new] = used_cbufs.try_emplace(index);
62 entry->second.MarkAsUsedIndirect(); 62 entry->second.MarkAsUsedIndirect();
63 63
64 const Node final_offset = [&]() { 64 Node final_offset = [&] {
65 // Attempt to inline constant buffer without a variable offset. This is done to allow 65 // Attempt to inline constant buffer without a variable offset. This is done to allow
66 // tracking LDC calls. 66 // tracking LDC calls.
67 if (const auto gpr = std::get_if<GprNode>(&*node)) { 67 if (const auto gpr = std::get_if<GprNode>(&*node)) {
@@ -69,9 +69,9 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
69 return Immediate(offset); 69 return Immediate(offset);
70 } 70 }
71 } 71 }
72 return Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); 72 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
73 }(); 73 }();
74 return MakeNode<CbufNode>(index, final_offset); 74 return MakeNode<CbufNode>(index, std::move(final_offset));
75} 75}
76 76
77Node ShaderIR::GetPredicate(u64 pred_, bool negated) { 77Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -89,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
89 89
90Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { 90Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
91 used_input_attributes.emplace(index); 91 used_input_attributes.emplace(index);
92 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 92 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
93} 93}
94 94
95Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { 95Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -122,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
122 } 122 }
123 used_output_attributes.insert(index); 123 used_output_attributes.insert(index);
124 124
125 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 125 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
126} 126}
127 127
128Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { 128Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -134,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
134} 134}
135 135
136Node ShaderIR::GetLocalMemory(Node address) { 136Node ShaderIR::GetLocalMemory(Node address) {
137 return MakeNode<LmemNode>(address); 137 return MakeNode<LmemNode>(std::move(address));
138} 138}
139 139
140Node ShaderIR::GetTemporal(u32 id) { 140Node ShaderIR::GetTemporary(u32 id) {
141 return GetRegister(Register::ZeroIndex + 1 + id); 141 return GetRegister(Register::ZeroIndex + 1 + id);
142} 142}
143 143
144Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { 144Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
145 if (absolute) { 145 if (absolute) {
146 value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); 146 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
147 } 147 }
148 if (negate) { 148 if (negate) {
149 value = Operation(OperationCode::FNegate, NO_PRECISE, value); 149 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
150 } 150 }
151 return value; 151 return value;
152} 152}
@@ -155,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
155 if (!saturate) { 155 if (!saturate) {
156 return value; 156 return value;
157 } 157 }
158 const Node positive_zero = Immediate(std::copysignf(0, 1)); 158
159 const Node positive_one = Immediate(1.0f); 159 Node positive_zero = Immediate(std::copysignf(0, 1));
160 return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); 160 Node positive_one = Immediate(1.0f);
161 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
162 std::move(positive_one));
161} 163}
162 164
163Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { 165Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
164 switch (size) { 166 switch (size) {
165 case Register::Size::Byte: 167 case Register::Size::Byte:
166 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 168 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
167 Immediate(24)); 169 std::move(value), Immediate(24));
168 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 170 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
169 Immediate(24)); 171 std::move(value), Immediate(24));
170 return value; 172 return value;
171 case Register::Size::Short: 173 case Register::Size::Short:
172 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 174 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
173 Immediate(16)); 175 std::move(value), Immediate(16));
174 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 176 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
175 Immediate(16)); 177 std::move(value), Immediate(16));
176 case Register::Size::Word: 178 case Register::Size::Word:
177 // Default - do nothing 179 // Default - do nothing
178 return value; 180 return value;
@@ -188,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
188 return value; 190 return value;
189 } 191 }
190 if (absolute) { 192 if (absolute) {
191 value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); 193 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
192 } 194 }
193 if (negate) { 195 if (negate) {
194 value = Operation(OperationCode::INegate, NO_PRECISE, value); 196 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
195 } 197 }
196 return value; 198 return value;
197} 199}
198 200
199Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { 201Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
200 const Node value = Immediate(instr.half_imm.PackImmediates()); 202 Node value = Immediate(instr.half_imm.PackImmediates());
201 if (!has_negation) { 203 if (!has_negation) {
202 return value; 204 return value;
203 } 205 }
204 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
205 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
206 206
207 return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); 207 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
208 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
209
210 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
211 std::move(second_negate));
208} 212}
209 213
210Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { 214Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
211 return Operation(OperationCode::HUnpack, type, value); 215 return Operation(OperationCode::HUnpack, type, std::move(value));
212} 216}
213 217
214Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { 218Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -216,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
216 case Tegra::Shader::HalfMerge::H0_H1: 220 case Tegra::Shader::HalfMerge::H0_H1:
217 return src; 221 return src;
218 case Tegra::Shader::HalfMerge::F32: 222 case Tegra::Shader::HalfMerge::F32:
219 return Operation(OperationCode::HMergeF32, src); 223 return Operation(OperationCode::HMergeF32, std::move(src));
220 case Tegra::Shader::HalfMerge::Mrg_H0: 224 case Tegra::Shader::HalfMerge::Mrg_H0:
221 return Operation(OperationCode::HMergeH0, dest, src); 225 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
222 case Tegra::Shader::HalfMerge::Mrg_H1: 226 case Tegra::Shader::HalfMerge::Mrg_H1:
223 return Operation(OperationCode::HMergeH1, dest, src); 227 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
224 } 228 }
225 UNREACHABLE(); 229 UNREACHABLE();
226 return src; 230 return src;
@@ -228,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
228 232
229Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { 233Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
230 if (absolute) { 234 if (absolute) {
231 value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); 235 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
232 } 236 }
233 if (negate) { 237 if (negate) {
234 value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), 238 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
235 GetPredicate(true)); 239 GetPredicate(true));
236 } 240 }
237 return value; 241 return value;
@@ -241,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
241 if (!saturate) { 245 if (!saturate) {
242 return value; 246 return value;
243 } 247 }
244 const Node positive_zero = Immediate(std::copysignf(0, 1)); 248
245 const Node positive_one = Immediate(1.0f); 249 Node positive_zero = Immediate(std::copysignf(0, 1));
246 return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); 250 Node positive_one = Immediate(1.0f);
251 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
252 std::move(positive_one));
247} 253}
248 254
249Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 255Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -271,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
271 condition == PredCondition::LessEqualWithNan || 277 condition == PredCondition::LessEqualWithNan ||
272 condition == PredCondition::GreaterThanWithNan || 278 condition == PredCondition::GreaterThanWithNan ||
273 condition == PredCondition::GreaterEqualWithNan) { 279 condition == PredCondition::GreaterEqualWithNan) {
274
275 predicate = Operation(OperationCode::LogicalOr, predicate, 280 predicate = Operation(OperationCode::LogicalOr, predicate,
276 Operation(OperationCode::LogicalFIsNan, op_a)); 281 Operation(OperationCode::LogicalFIsNan, op_a));
277 predicate = Operation(OperationCode::LogicalOr, predicate, 282 predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -300,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
300 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
301 "Unknown predicate comparison operation"); 306 "Unknown predicate comparison operation");
302 307
303 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); 308 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
309 std::move(op_b));
304 310
305 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || 311 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
306 condition == PredCondition::NotEqualWithNan || 312 condition == PredCondition::NotEqualWithNan ||
@@ -330,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
330 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 336 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
331 "Unknown predicate comparison operation"); 337 "Unknown predicate comparison operation");
332 338
333 const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); 339 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
334
335 return predicate;
336} 340}
337 341
338OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 342OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -358,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
358} 362}
359 363
360void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { 364void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
361 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); 365 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
362} 366}
363 367
364void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { 368void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
365 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); 369 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
366} 370}
367 371
368void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { 372void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
369 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); 373 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
370} 374}
371 375
372void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { 376void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
373 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); 377 bb.push_back(
378 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
374} 379}
375 380
376void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { 381void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
377 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 382 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
378} 383}
379 384
380void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { 385void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
381 if (!sets_cc) { 386 if (!sets_cc) {
382 return; 387 return;
383 } 388 }
384 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); 389 Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
385 SetInternalFlag(bb, InternalFlag::Zero, zerop); 390 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
386 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 391 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
387} 392}
388 393
@@ -390,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
390 if (!sets_cc) { 395 if (!sets_cc) {
391 return; 396 return;
392 } 397 }
393 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); 398 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
394 SetInternalFlag(bb, InternalFlag::Zero, zerop); 399 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
395 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 400 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
396} 401}
397 402
398Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { 403Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
399 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), 404 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
400 Immediate(bits)); 405 Immediate(offset), Immediate(bits));
401} 406}
402 407
403} // namespace VideoCommon::Shader 408} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 126c78136..59a083d90 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstring>
9#include <map> 8#include <map>
10#include <optional> 9#include <optional>
11#include <set> 10#include <set>
12#include <string>
13#include <tuple> 11#include <tuple>
14#include <variant>
15#include <vector> 12#include <vector>
16 13
17#include "common/common_types.h" 14#include "common/common_types.h"
@@ -210,8 +207,8 @@ private:
210 Node GetInternalFlag(InternalFlag flag, bool negated = false); 207 Node GetInternalFlag(InternalFlag flag, bool negated = false);
211 /// Generates a node representing a local memory address 208 /// Generates a node representing a local memory address
212 Node GetLocalMemory(Node address); 209 Node GetLocalMemory(Node address);
213 /// Generates a temporal, internally it uses a post-RZ register 210 /// Generates a temporary, internally it uses a post-RZ register
214 Node GetTemporal(u32 id); 211 Node GetTemporary(u32 id);
215 212
216 /// Sets a register. src value must be a number-evaluated node. 213 /// Sets a register. src value must be a number-evaluated node.
217 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); 214 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -221,8 +218,8 @@ private:
221 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); 218 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
222 /// Sets a local memory address. address and value must be a number-evaluated node 219 /// Sets a local memory address. address and value must be a number-evaluated node
223 void SetLocalMemory(NodeBlock& bb, Node address, Node value); 220 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
224 /// Sets a temporal. Internally it uses a post-RZ register 221 /// Sets a temporary. Internally it uses a post-RZ register
225 void SetTemporal(NodeBlock& bb, u32 id, Node value); 222 void SetTemporary(NodeBlock& bb, u32 id, Node value);
226 223
227 /// Sets internal flags from a float 224 /// Sets internal flags from a float
228 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); 225 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index dc132a4a3..a53e02253 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,18 +15,20 @@ namespace {
15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
16 OperationCode operation_code) { 16 OperationCode operation_code) {
17 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
18 const Node node = code.at(cursor); 18 Node node = code.at(cursor);
19
19 if (const auto operation = std::get_if<OperationNode>(&*node)) { 20 if (const auto operation = std::get_if<OperationNode>(&*node)) {
20 if (operation->GetCode() == operation_code) { 21 if (operation->GetCode() == operation_code) {
21 return {node, cursor}; 22 return {std::move(node), cursor};
22 } 23 }
23 } 24 }
25
24 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 26 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
25 const auto& conditional_code = conditional->GetCode(); 27 const auto& conditional_code = conditional->GetCode();
26 const auto [found, internal_cursor] = FindOperation( 28 auto [found, internal_cursor] = FindOperation(
27 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); 29 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
28 if (found) { 30 if (found) {
29 return {found, cursor}; 31 return {std::move(found), cursor};
30 } 32 }
31 } 33 }
32 } 34 }
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 7f9623c62..a3a3770a7 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,10 +116,10 @@ public:
116 std::lock_guard lock{mutex}; 116 std::lock_guard lock{mutex};
117 auto& maxwell3d = system.GPU().Maxwell3D(); 117 auto& maxwell3d = system.GPU().Maxwell3D();
118 118
119 if (!maxwell3d.dirty_flags.zeta_buffer) { 119 if (!maxwell3d.dirty.depth_buffer) {
120 return depth_buffer.view; 120 return depth_buffer.view;
121 } 121 }
122 maxwell3d.dirty_flags.zeta_buffer = false; 122 maxwell3d.dirty.depth_buffer = false;
123 123
124 const auto& regs{maxwell3d.regs}; 124 const auto& regs{maxwell3d.regs};
125 const auto gpu_addr{regs.zeta.Address()}; 125 const auto gpu_addr{regs.zeta.Address()};
@@ -145,10 +145,10 @@ public:
145 std::lock_guard lock{mutex}; 145 std::lock_guard lock{mutex};
146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
147 auto& maxwell3d = system.GPU().Maxwell3D(); 147 auto& maxwell3d = system.GPU().Maxwell3D();
148 if (!maxwell3d.dirty_flags.color_buffer[index]) { 148 if (!maxwell3d.dirty.render_target[index]) {
149 return render_targets[index].view; 149 return render_targets[index].view;
150 } 150 }
151 maxwell3d.dirty_flags.color_buffer.reset(index); 151 maxwell3d.dirty.render_target[index] = false;
152 152
153 const auto& regs{maxwell3d.regs}; 153 const auto& regs{maxwell3d.regs};
154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -274,10 +274,11 @@ protected:
274 auto& maxwell3d = system.GPU().Maxwell3D(); 274 auto& maxwell3d = system.GPU().Maxwell3D();
275 const u32 index = surface->GetRenderTarget(); 275 const u32 index = surface->GetRenderTarget();
276 if (index == DEPTH_RT) { 276 if (index == DEPTH_RT) {
277 maxwell3d.dirty_flags.zeta_buffer = true; 277 maxwell3d.dirty.depth_buffer = true;
278 } else { 278 } else {
279 maxwell3d.dirty_flags.color_buffer.set(index, true); 279 maxwell3d.dirty.render_target[index] = true;
280 } 280 }
281 maxwell3d.dirty.render_settings = true;
281 } 282 }
282 283
283 void Register(TSurface surface) { 284 void Register(TSurface surface) {