diff options
| author | 2019-07-21 00:59:52 -0400 | |
|---|---|---|
| committer | 2019-07-21 00:59:52 -0400 | |
| commit | 27e10e0442dfd347387c6eaf148b27f5cc38bcaf (patch) | |
| tree | c078fc3f0e62e55fc92a0c8b582666deece0a968 /src/video_core/engines | |
| parent | Update README.md (diff) | |
| parent | Maxwell3D: Reorganize and address feedback (diff) | |
| download | yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.tar.gz yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.tar.xz yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.zip | |
Merge pull request #2735 from FernandoS27/pipeline-rework
Rework Dirty Flags in GPU Pipeline, Optimize CBData and Redo Clearing mechanism
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 261 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 89 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 2 |
5 files changed, 287 insertions, 69 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..e3d5fb8a9 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..fe9fc0278 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 247 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| @@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 266 | ||
| 109 | const u32 method = method_call.method; | 267 | const u32 method = method_call.method; |
| 110 | 268 | ||
| 269 | if (method == cb_data_state.current) { | ||
| 270 | regs.reg_array[method] = method_call.argument; | ||
| 271 | ProcessCBData(method_call.argument); | ||
| 272 | return; | ||
| 273 | } else if (cb_data_state.current != null_cb_data) { | ||
| 274 | FinishCBData(); | ||
| 275 | } | ||
| 276 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 277 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 278 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 279 | if (executing_macro != 0) { |
| @@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 309 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 310 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 311 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 312 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 313 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 314 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 315 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 316 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 317 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 318 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 319 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 320 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 321 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 322 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 323 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 324 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 325 | } |
| 190 | } | 326 | } |
| 191 | 327 | ||
| @@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 350 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 351 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 352 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 353 | StartCBData(method); |
| 218 | break; | 354 | break; |
| 219 | } | 355 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 356 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 397 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 398 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 399 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 400 | dirty.OnMemoryWrite(); |
| 265 | } | 401 | } |
| 266 | break; | 402 | break; |
| 267 | } | 403 | } |
| @@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 469 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 470 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 471 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 472 | break; |
| 338 | } | 473 | } |
| 339 | default: | 474 | default: |
| @@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 405 | } | 540 | } |
| 406 | 541 | ||
| 407 | void Maxwell3D::ProcessCBData(u32 value) { | 542 | void Maxwell3D::ProcessCBData(u32 value) { |
| 543 | const u32 id = cb_data_state.id; | ||
| 544 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 545 | // Increment the current buffer position. | ||
| 546 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 547 | cb_data_state.counter++; | ||
| 548 | } | ||
| 549 | |||
| 550 | void Maxwell3D::StartCBData(u32 method) { | ||
| 551 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 552 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 553 | cb_data_state.id = method - first_cb_data; | ||
| 554 | cb_data_state.current = method; | ||
| 555 | cb_data_state.counter = 0; | ||
| 556 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 557 | } | ||
| 558 | |||
| 559 | void Maxwell3D::FinishCBData() { | ||
| 408 | // Write the input value to the current const buffer at the current position. | 560 | // Write the input value to the current const buffer at the current position. |
| 409 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 561 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 410 | ASSERT(buffer_address != 0); | 562 | ASSERT(buffer_address != 0); |
| 411 | 563 | ||
| 412 | // Don't allow writing past the end of the buffer. | 564 | // Don't allow writing past the end of the buffer. |
| 413 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 565 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 414 | 566 | ||
| 415 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | 567 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 568 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; | ||
| 416 | 569 | ||
| 417 | u8* ptr{memory_manager.GetPointer(address)}; | 570 | const u32 id = cb_data_state.id; |
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 571 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 419 | memory_manager.Write<u32>(address, value); | 572 | dirty.OnMemoryWrite(); |
| 420 | 573 | ||
| 421 | dirty_flags.OnMemoryWrite(); | 574 | cb_data_state.id = null_cb_data; |
| 422 | 575 | cb_data_state.current = null_cb_data; | |
| 423 | // Increment the current buffer position. | ||
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 425 | } | 576 | } |
| 426 | 577 | ||
| 427 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 578 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8d15c8a48..ac300bf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1124,23 +1124,77 @@ public: | |||
| 1124 | 1124 | ||
| 1125 | State state{}; | 1125 | State state{}; |
| 1126 | 1126 | ||
| 1127 | struct DirtyFlags { | 1127 | struct DirtyRegs { |
| 1128 | std::bitset<8> color_buffer{0xFF}; | 1128 | static constexpr std::size_t NUM_REGS = 256; |
| 1129 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1129 | union { |
| 1130 | struct { | ||
| 1131 | bool null_dirty; | ||
| 1132 | |||
| 1133 | // Vertex Attributes | ||
| 1134 | bool vertex_attrib_format; | ||
| 1135 | |||
| 1136 | // Vertex Arrays | ||
| 1137 | std::array<bool, 32> vertex_array; | ||
| 1138 | |||
| 1139 | bool vertex_array_buffers; | ||
| 1140 | |||
| 1141 | // Vertex Instances | ||
| 1142 | std::array<bool, 32> vertex_instance; | ||
| 1143 | |||
| 1144 | bool vertex_instances; | ||
| 1145 | |||
| 1146 | // Render Targets | ||
| 1147 | std::array<bool, 8> render_target; | ||
| 1148 | bool depth_buffer; | ||
| 1149 | |||
| 1150 | bool render_settings; | ||
| 1151 | |||
| 1152 | // Shaders | ||
| 1153 | bool shaders; | ||
| 1154 | |||
| 1155 | // Rasterizer State | ||
| 1156 | bool viewport; | ||
| 1157 | bool clip_coefficient; | ||
| 1158 | bool cull_mode; | ||
| 1159 | bool primitive_restart; | ||
| 1160 | bool depth_test; | ||
| 1161 | bool stencil_test; | ||
| 1162 | bool blend_state; | ||
| 1163 | bool scissor_test; | ||
| 1164 | bool transform_feedback; | ||
| 1165 | bool color_mask; | ||
| 1166 | bool polygon_offset; | ||
| 1130 | 1167 | ||
| 1131 | bool vertex_attrib_format = true; | 1168 | // Complementary |
| 1132 | bool zeta_buffer = true; | 1169 | bool viewport_transform; |
| 1133 | bool shaders = true; | 1170 | bool screen_y_control; |
| 1171 | |||
| 1172 | bool memory_general; | ||
| 1173 | }; | ||
| 1174 | std::array<bool, NUM_REGS> regs; | ||
| 1175 | }; | ||
| 1176 | |||
| 1177 | void ResetVertexArrays() { | ||
| 1178 | vertex_array.fill(true); | ||
| 1179 | vertex_array_buffers = true; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | void ResetRenderTargets() { | ||
| 1183 | depth_buffer = true; | ||
| 1184 | render_target.fill(true); | ||
| 1185 | render_settings = true; | ||
| 1186 | } | ||
| 1134 | 1187 | ||
| 1135 | void OnMemoryWrite() { | 1188 | void OnMemoryWrite() { |
| 1136 | zeta_buffer = true; | ||
| 1137 | shaders = true; | 1189 | shaders = true; |
| 1138 | color_buffer.set(); | 1190 | memory_general = true; |
| 1139 | vertex_array.set(); | 1191 | ResetRenderTargets(); |
| 1192 | ResetVertexArrays(); | ||
| 1140 | } | 1193 | } |
| 1141 | }; | ||
| 1142 | 1194 | ||
| 1143 | DirtyFlags dirty_flags; | 1195 | } dirty{}; |
| 1196 | |||
| 1197 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1144 | 1198 | ||
| 1145 | /// Reads a register value located at the input method address | 1199 | /// Reads a register value located at the input method address |
| 1146 | u32 GetRegisterValue(u32 method) const; | 1200 | u32 GetRegisterValue(u32 method) const; |
| @@ -1192,6 +1246,15 @@ private: | |||
| 1192 | /// Interpreter for the macro codes uploaded to the GPU. | 1246 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1193 | MacroInterpreter macro_interpreter; | 1247 | MacroInterpreter macro_interpreter; |
| 1194 | 1248 | ||
| 1249 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1250 | struct { | ||
| 1251 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1252 | u32 current{null_cb_data}; | ||
| 1253 | u32 id{null_cb_data}; | ||
| 1254 | u32 start_pos{}; | ||
| 1255 | u32 counter{}; | ||
| 1256 | } cb_data_state; | ||
| 1257 | |||
| 1195 | Upload::State upload_state; | 1258 | Upload::State upload_state; |
| 1196 | 1259 | ||
| 1197 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1260 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| @@ -1200,6 +1263,8 @@ private: | |||
| 1200 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1263 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1201 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1264 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1202 | 1265 | ||
| 1266 | void InitDirtySettings(); | ||
| 1267 | |||
| 1203 | /** | 1268 | /** |
| 1204 | * Call a macro on this engine. | 1269 | * Call a macro on this engine. |
| 1205 | * @param method Method to call | 1270 | * @param method Method to call |
| @@ -1223,7 +1288,9 @@ private: | |||
| 1223 | void ProcessSyncPoint(); | 1288 | void ProcessSyncPoint(); |
| 1224 | 1289 | ||
| 1225 | /// Handles a write to the CB_DATA[i] register. | 1290 | /// Handles a write to the CB_DATA[i] register. |
| 1291 | void StartCBData(u32 method); | ||
| 1226 | void ProcessCBData(u32 value); | 1292 | void ProcessCBData(u32 value); |
| 1293 | void FinishCBData(); | ||
| 1227 | 1294 | ||
| 1228 | /// Handles a write to the CB_BIND register. | 1295 | /// Handles a write to the CB_BIND register. |
| 1229 | void ProcessCBBind(Regs::ShaderStage stage); | 1296 | void ProcessCBBind(Regs::ShaderStage stage); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..b5f57e534 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 62 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |