summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar bunnei2019-07-21 00:59:52 -0400
committerGravatar GitHub2019-07-21 00:59:52 -0400
commit27e10e0442dfd347387c6eaf148b27f5cc38bcaf (patch)
treec078fc3f0e62e55fc92a0c8b582666deece0a968 /src/video_core/engines
parentUpdate README.md (diff)
parentMaxwell3D: Reorganize and address feedback (diff)
downloadyuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.tar.gz
yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.tar.xz
yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.zip
Merge pull request #2735 from FernandoS27/pipeline-rework
Rework Dirty Flags in GPU Pipeline, Optimize CBData and Redo Clearing mechanism
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/kepler_compute.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp261
-rw-r--r--src/video_core/engines/maxwell_3d.h89
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
5 files changed, 287 insertions, 69 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7404a8163..e3d5fb8a9 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
37 const bool is_last_call = method_call.IsLastCall(); 37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call); 38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) { 39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 40 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
41 } 41 }
42 break; 42 break;
43 } 43 }
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..44279de00 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 37 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..fe9fc0278 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitDirtySettings();
25 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
26} 27}
27 28
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
69 regs.stencil_back_func_mask = 0xFFFFFFFF; 70 regs.stencil_back_func_mask = 0xFFFFFFFF;
70 regs.stencil_back_mask = 0xFFFFFFFF; 71 regs.stencil_back_mask = 0xFFFFFFFF;
71 72
73 regs.depth_test_func = Regs::ComparisonOp::Always;
74 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
75 regs.cull.cull_face = Regs::Cull::CullFace::Back;
76
72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 77 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
73 // register carrying a default value. Assume it's OpenGL's default (1). 78 // register carrying a default value. Assume it's OpenGL's default (1).
74 regs.point_size = 1.0f; 79 regs.point_size = 1.0f;
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
86 regs.rt_separate_frag_data = 1; 91 regs.rt_separate_frag_data = 1;
87} 92}
88 93
94#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
95
96void Maxwell3D::InitDirtySettings() {
97 const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
98 const auto start_itr = dirty_pointers.begin() + start;
99 const auto end_itr = start_itr + range;
100 std::fill(start_itr, end_itr, position);
101 };
102 dirty.regs.fill(true);
103
104 // Init Render Targets
105 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
106 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
107 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
108 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
109 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
110 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
111 rt_dirty_reg++;
112 }
113 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
114 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
115 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
116 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
117 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
118 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
119 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
120
121 // Init Vertex Arrays
122 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
123 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
124 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
125 u32 va_reg = DIRTY_REGS_POS(vertex_array);
126 u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
127 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
128 vertex_reg += vertex_array_size) {
129 set_block(vertex_reg, 3, va_reg);
130 // The divisor concerns vertex array instances
131 dirty_pointers[vertex_reg + 3] = vi_reg;
132 va_reg++;
133 vi_reg++;
134 }
135 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
136 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
137 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
138 va_reg = DIRTY_REGS_POS(vertex_array);
139 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
140 vertex_reg += vertex_limit_size) {
141 set_block(vertex_reg, vertex_limit_size, va_reg);
142 va_reg++;
143 }
144 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
145 constexpr u32 vertex_instance_size =
146 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
147 constexpr u32 vertex_instance_end =
148 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
149 vi_reg = DIRTY_REGS_POS(vertex_instance);
150 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
151 vertex_reg += vertex_instance_size) {
152 set_block(vertex_reg, vertex_instance_size, vi_reg);
153 vi_reg++;
154 }
155 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
156 DIRTY_REGS_POS(vertex_attrib_format));
157
158 // Init Shaders
159 constexpr u32 shader_registers_count =
160 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
161 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
162 DIRTY_REGS_POS(shaders));
163
164 // State
165
166 // Viewport
167 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
168 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
169 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
170 set_block(viewport_start, viewport_size, viewport_dirty_reg);
171 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
172 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
173 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
174
175 // Viewport transformation
176 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
177 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
178 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
179
180 // Cullmode
181 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
182 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
183 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
184
185 // Screen y control
186 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
187
188 // Primitive Restart
189 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
190 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
191 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
192
193 // Depth Test
194 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
195 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
196 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
197 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
198
199 // Stencil Test
200 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
201 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
202 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
205 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
206 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
207 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
210 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
211 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
212 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
217
218 // Color Mask
219 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
220 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
221 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
222 color_mask_dirty_reg);
223 // Blend State
224 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
225 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
226 blend_state_dirty_reg);
227 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
229 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
230 blend_state_dirty_reg);
231
232 // Scissor State
233 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
234 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
235 scissor_test_dirty_reg);
236
237 // Polygon Offset
238 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
239 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
240 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
241 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
242 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
243 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
244 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
245}
246
89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 247void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
90 // Reset the current macro. 248 // Reset the current macro.
91 executing_macro = 0; 249 executing_macro = 0;
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 266
109 const u32 method = method_call.method; 267 const u32 method = method_call.method;
110 268
269 if (method == cb_data_state.current) {
270 regs.reg_array[method] = method_call.argument;
271 ProcessCBData(method_call.argument);
272 return;
273 } else if (cb_data_state.current != null_cb_data) {
274 FinishCBData();
275 }
276
111 // It is an error to write to a register other than the current macro's ARG register before it 277 // It is an error to write to a register other than the current macro's ARG register before it
112 // has finished execution. 278 // has finished execution.
113 if (executing_macro != 0) { 279 if (executing_macro != 0) {
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
143 309
144 if (regs.reg_array[method] != method_call.argument) { 310 if (regs.reg_array[method] != method_call.argument) {
145 regs.reg_array[method] = method_call.argument; 311 regs.reg_array[method] = method_call.argument;
146 // Color buffers 312 const std::size_t dirty_reg = dirty_pointers[method];
147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 313 if (dirty_reg) {
148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 314 dirty.regs[dirty_reg] = true;
149 if (method >= first_rt_reg && 315 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 316 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; 317 dirty.vertex_array_buffers = true;
152 dirty_flags.color_buffer.set(rt_index); 318 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
153 } 319 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
154 320 dirty.vertex_instances = true;
155 // Zeta buffer 321 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 322 dirty_reg < DIRTY_REGS_POS(render_settings)) {
157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) || 323 dirty.render_settings = true;
158 method == MAXWELL3D_REG_INDEX(zeta_width) || 324 }
159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
162 dirty_flags.zeta_buffer = true;
163 }
164
165 // Shader
166 constexpr u32 shader_registers_count =
167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
170 dirty_flags.shaders = true;
171 }
172
173 // Vertex format
174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true;
177 }
178
179 // Vertex buffer
180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 } 325 }
190 } 326 }
191 327
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
214 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 350 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
215 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 351 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
216 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 352 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
217 ProcessCBData(method_call.argument); 353 StartCBData(method);
218 break; 354 break;
219 } 355 }
220 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 356 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
261 const bool is_last_call = method_call.IsLastCall(); 397 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call); 398 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) { 399 if (is_last_call) {
264 dirty_flags.OnMemoryWrite(); 400 dirty.OnMemoryWrite();
265 } 401 }
266 break; 402 break;
267 } 403 }
@@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() {
333 query_result.timestamp = system.CoreTiming().GetTicks(); 469 query_result.timestamp = system.CoreTiming().GetTicks();
334 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); 470 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
335 } 471 }
336 dirty_flags.OnMemoryWrite();
337 break; 472 break;
338 } 473 }
339 default: 474 default:
@@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
405} 540}
406 541
407void Maxwell3D::ProcessCBData(u32 value) { 542void Maxwell3D::ProcessCBData(u32 value) {
543 const u32 id = cb_data_state.id;
544 cb_data_state.buffer[id][cb_data_state.counter] = value;
545 // Increment the current buffer position.
546 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
547 cb_data_state.counter++;
548}
549
550void Maxwell3D::StartCBData(u32 method) {
551 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
552 cb_data_state.start_pos = regs.const_buffer.cb_pos;
553 cb_data_state.id = method - first_cb_data;
554 cb_data_state.current = method;
555 cb_data_state.counter = 0;
556 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
557}
558
559void Maxwell3D::FinishCBData() {
408 // Write the input value to the current const buffer at the current position. 560 // Write the input value to the current const buffer at the current position.
409 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 561 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
410 ASSERT(buffer_address != 0); 562 ASSERT(buffer_address != 0);
411 563
412 // Don't allow writing past the end of the buffer. 564 // Don't allow writing past the end of the buffer.
413 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 565 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
414 566
415 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; 567 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
568 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
416 569
417 u8* ptr{memory_manager.GetPointer(address)}; 570 const u32 id = cb_data_state.id;
418 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 571 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
419 memory_manager.Write<u32>(address, value); 572 dirty.OnMemoryWrite();
420 573
421 dirty_flags.OnMemoryWrite(); 574 cb_data_state.id = null_cb_data;
422 575 cb_data_state.current = null_cb_data;
423 // Increment the current buffer position.
424 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
425} 576}
426 577
427Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 578Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8d15c8a48..ac300bf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1124,23 +1124,77 @@ public:
1124 1124
1125 State state{}; 1125 State state{};
1126 1126
1127 struct DirtyFlags { 1127 struct DirtyRegs {
1128 std::bitset<8> color_buffer{0xFF}; 1128 static constexpr std::size_t NUM_REGS = 256;
1129 std::bitset<32> vertex_array{0xFFFFFFFF}; 1129 union {
1130 struct {
1131 bool null_dirty;
1132
1133 // Vertex Attributes
1134 bool vertex_attrib_format;
1135
1136 // Vertex Arrays
1137 std::array<bool, 32> vertex_array;
1138
1139 bool vertex_array_buffers;
1140
1141 // Vertex Instances
1142 std::array<bool, 32> vertex_instance;
1143
1144 bool vertex_instances;
1145
1146 // Render Targets
1147 std::array<bool, 8> render_target;
1148 bool depth_buffer;
1149
1150 bool render_settings;
1151
1152 // Shaders
1153 bool shaders;
1154
1155 // Rasterizer State
1156 bool viewport;
1157 bool clip_coefficient;
1158 bool cull_mode;
1159 bool primitive_restart;
1160 bool depth_test;
1161 bool stencil_test;
1162 bool blend_state;
1163 bool scissor_test;
1164 bool transform_feedback;
1165 bool color_mask;
1166 bool polygon_offset;
1130 1167
1131 bool vertex_attrib_format = true; 1168 // Complementary
1132 bool zeta_buffer = true; 1169 bool viewport_transform;
1133 bool shaders = true; 1170 bool screen_y_control;
1171
1172 bool memory_general;
1173 };
1174 std::array<bool, NUM_REGS> regs;
1175 };
1176
1177 void ResetVertexArrays() {
1178 vertex_array.fill(true);
1179 vertex_array_buffers = true;
1180 }
1181
1182 void ResetRenderTargets() {
1183 depth_buffer = true;
1184 render_target.fill(true);
1185 render_settings = true;
1186 }
1134 1187
1135 void OnMemoryWrite() { 1188 void OnMemoryWrite() {
1136 zeta_buffer = true;
1137 shaders = true; 1189 shaders = true;
1138 color_buffer.set(); 1190 memory_general = true;
1139 vertex_array.set(); 1191 ResetRenderTargets();
1192 ResetVertexArrays();
1140 } 1193 }
1141 };
1142 1194
1143 DirtyFlags dirty_flags; 1195 } dirty{};
1196
1197 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1144 1198
1145 /// Reads a register value located at the input method address 1199 /// Reads a register value located at the input method address
1146 u32 GetRegisterValue(u32 method) const; 1200 u32 GetRegisterValue(u32 method) const;
@@ -1192,6 +1246,15 @@ private:
1192 /// Interpreter for the macro codes uploaded to the GPU. 1246 /// Interpreter for the macro codes uploaded to the GPU.
1193 MacroInterpreter macro_interpreter; 1247 MacroInterpreter macro_interpreter;
1194 1248
1249 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1250 struct {
1251 std::array<std::array<u32, 0x4000>, 16> buffer;
1252 u32 current{null_cb_data};
1253 u32 id{null_cb_data};
1254 u32 start_pos{};
1255 u32 counter{};
1256 } cb_data_state;
1257
1195 Upload::State upload_state; 1258 Upload::State upload_state;
1196 1259
1197 /// Retrieves information about a specific TIC entry from the TIC buffer. 1260 /// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -1200,6 +1263,8 @@ private:
1200 /// Retrieves information about a specific TSC entry from the TSC buffer. 1263 /// Retrieves information about a specific TSC entry from the TSC buffer.
1201 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1264 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1202 1265
1266 void InitDirtySettings();
1267
1203 /** 1268 /**
1204 * Call a macro on this engine. 1269 * Call a macro on this engine.
1205 * @param method Method to call 1270 * @param method Method to call
@@ -1223,7 +1288,9 @@ private:
1223 void ProcessSyncPoint(); 1288 void ProcessSyncPoint();
1224 1289
1225 /// Handles a write to the CB_DATA[i] register. 1290 /// Handles a write to the CB_DATA[i] register.
1291 void StartCBData(u32 method);
1226 void ProcessCBData(u32 value); 1292 void ProcessCBData(u32 value);
1293 void FinishCBData();
1227 1294
1228 /// Handles a write to the CB_BIND register. 1295 /// Handles a write to the CB_BIND register.
1229 void ProcessCBBind(Regs::ShaderStage stage); 1296 void ProcessCBBind(Regs::ShaderStage stage);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0..b5f57e534 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
58 } 58 }
59 59
60 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
62 62
63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D