diff options
| author | 2019-07-21 00:59:52 -0400 | |
|---|---|---|
| committer | 2019-07-21 00:59:52 -0400 | |
| commit | 27e10e0442dfd347387c6eaf148b27f5cc38bcaf (patch) | |
| tree | c078fc3f0e62e55fc92a0c8b582666deece0a968 /src | |
| parent | Update README.md (diff) | |
| parent | Maxwell3D: Reorganize and address feedback (diff) | |
| download | yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.tar.gz yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.tar.xz yuzu-27e10e0442dfd347387c6eaf148b27f5cc38bcaf.zip | |
Merge pull request #2735 from FernandoS27/pipeline-rework
Rework Dirty Flags in GPU Pipeline, Optimize CBData and Redo Clearing mechanism
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 261 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 89 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 178 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 41 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 13 |
14 files changed, 528 insertions, 116 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..bd036cbe8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..e3d5fb8a9 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..fe9fc0278 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 247 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| @@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 266 | ||
| 109 | const u32 method = method_call.method; | 267 | const u32 method = method_call.method; |
| 110 | 268 | ||
| 269 | if (method == cb_data_state.current) { | ||
| 270 | regs.reg_array[method] = method_call.argument; | ||
| 271 | ProcessCBData(method_call.argument); | ||
| 272 | return; | ||
| 273 | } else if (cb_data_state.current != null_cb_data) { | ||
| 274 | FinishCBData(); | ||
| 275 | } | ||
| 276 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 277 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 278 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 279 | if (executing_macro != 0) { |
| @@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 309 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 310 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 311 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 312 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 313 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 314 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 315 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 316 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 317 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 318 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 319 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 320 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 321 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 322 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 323 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 324 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 325 | } |
| 190 | } | 326 | } |
| 191 | 327 | ||
| @@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 350 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 351 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 352 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 353 | StartCBData(method); |
| 218 | break; | 354 | break; |
| 219 | } | 355 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 356 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 397 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 398 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 399 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 400 | dirty.OnMemoryWrite(); |
| 265 | } | 401 | } |
| 266 | break; | 402 | break; |
| 267 | } | 403 | } |
| @@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 469 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 470 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 471 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 472 | break; |
| 338 | } | 473 | } |
| 339 | default: | 474 | default: |
| @@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 405 | } | 540 | } |
| 406 | 541 | ||
| 407 | void Maxwell3D::ProcessCBData(u32 value) { | 542 | void Maxwell3D::ProcessCBData(u32 value) { |
| 543 | const u32 id = cb_data_state.id; | ||
| 544 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 545 | // Increment the current buffer position. | ||
| 546 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 547 | cb_data_state.counter++; | ||
| 548 | } | ||
| 549 | |||
| 550 | void Maxwell3D::StartCBData(u32 method) { | ||
| 551 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 552 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 553 | cb_data_state.id = method - first_cb_data; | ||
| 554 | cb_data_state.current = method; | ||
| 555 | cb_data_state.counter = 0; | ||
| 556 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 557 | } | ||
| 558 | |||
| 559 | void Maxwell3D::FinishCBData() { | ||
| 408 | // Write the input value to the current const buffer at the current position. | 560 | // Write the input value to the current const buffer at the current position. |
| 409 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 561 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 410 | ASSERT(buffer_address != 0); | 562 | ASSERT(buffer_address != 0); |
| 411 | 563 | ||
| 412 | // Don't allow writing past the end of the buffer. | 564 | // Don't allow writing past the end of the buffer. |
| 413 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 565 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 414 | 566 | ||
| 415 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | 567 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 568 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; | ||
| 416 | 569 | ||
| 417 | u8* ptr{memory_manager.GetPointer(address)}; | 570 | const u32 id = cb_data_state.id; |
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 571 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 419 | memory_manager.Write<u32>(address, value); | 572 | dirty.OnMemoryWrite(); |
| 420 | 573 | ||
| 421 | dirty_flags.OnMemoryWrite(); | 574 | cb_data_state.id = null_cb_data; |
| 422 | 575 | cb_data_state.current = null_cb_data; | |
| 423 | // Increment the current buffer position. | ||
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 425 | } | 576 | } |
| 426 | 577 | ||
| 427 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 578 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8d15c8a48..ac300bf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1124,23 +1124,77 @@ public: | |||
| 1124 | 1124 | ||
| 1125 | State state{}; | 1125 | State state{}; |
| 1126 | 1126 | ||
| 1127 | struct DirtyFlags { | 1127 | struct DirtyRegs { |
| 1128 | std::bitset<8> color_buffer{0xFF}; | 1128 | static constexpr std::size_t NUM_REGS = 256; |
| 1129 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1129 | union { |
| 1130 | struct { | ||
| 1131 | bool null_dirty; | ||
| 1132 | |||
| 1133 | // Vertex Attributes | ||
| 1134 | bool vertex_attrib_format; | ||
| 1135 | |||
| 1136 | // Vertex Arrays | ||
| 1137 | std::array<bool, 32> vertex_array; | ||
| 1138 | |||
| 1139 | bool vertex_array_buffers; | ||
| 1140 | |||
| 1141 | // Vertex Instances | ||
| 1142 | std::array<bool, 32> vertex_instance; | ||
| 1143 | |||
| 1144 | bool vertex_instances; | ||
| 1145 | |||
| 1146 | // Render Targets | ||
| 1147 | std::array<bool, 8> render_target; | ||
| 1148 | bool depth_buffer; | ||
| 1149 | |||
| 1150 | bool render_settings; | ||
| 1151 | |||
| 1152 | // Shaders | ||
| 1153 | bool shaders; | ||
| 1154 | |||
| 1155 | // Rasterizer State | ||
| 1156 | bool viewport; | ||
| 1157 | bool clip_coefficient; | ||
| 1158 | bool cull_mode; | ||
| 1159 | bool primitive_restart; | ||
| 1160 | bool depth_test; | ||
| 1161 | bool stencil_test; | ||
| 1162 | bool blend_state; | ||
| 1163 | bool scissor_test; | ||
| 1164 | bool transform_feedback; | ||
| 1165 | bool color_mask; | ||
| 1166 | bool polygon_offset; | ||
| 1130 | 1167 | ||
| 1131 | bool vertex_attrib_format = true; | 1168 | // Complementary |
| 1132 | bool zeta_buffer = true; | 1169 | bool viewport_transform; |
| 1133 | bool shaders = true; | 1170 | bool screen_y_control; |
| 1171 | |||
| 1172 | bool memory_general; | ||
| 1173 | }; | ||
| 1174 | std::array<bool, NUM_REGS> regs; | ||
| 1175 | }; | ||
| 1176 | |||
| 1177 | void ResetVertexArrays() { | ||
| 1178 | vertex_array.fill(true); | ||
| 1179 | vertex_array_buffers = true; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | void ResetRenderTargets() { | ||
| 1183 | depth_buffer = true; | ||
| 1184 | render_target.fill(true); | ||
| 1185 | render_settings = true; | ||
| 1186 | } | ||
| 1134 | 1187 | ||
| 1135 | void OnMemoryWrite() { | 1188 | void OnMemoryWrite() { |
| 1136 | zeta_buffer = true; | ||
| 1137 | shaders = true; | 1189 | shaders = true; |
| 1138 | color_buffer.set(); | 1190 | memory_general = true; |
| 1139 | vertex_array.set(); | 1191 | ResetRenderTargets(); |
| 1192 | ResetVertexArrays(); | ||
| 1140 | } | 1193 | } |
| 1141 | }; | ||
| 1142 | 1194 | ||
| 1143 | DirtyFlags dirty_flags; | 1195 | } dirty{}; |
| 1196 | |||
| 1197 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1144 | 1198 | ||
| 1145 | /// Reads a register value located at the input method address | 1199 | /// Reads a register value located at the input method address |
| 1146 | u32 GetRegisterValue(u32 method) const; | 1200 | u32 GetRegisterValue(u32 method) const; |
| @@ -1192,6 +1246,15 @@ private: | |||
| 1192 | /// Interpreter for the macro codes uploaded to the GPU. | 1246 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1193 | MacroInterpreter macro_interpreter; | 1247 | MacroInterpreter macro_interpreter; |
| 1194 | 1248 | ||
| 1249 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1250 | struct { | ||
| 1251 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1252 | u32 current{null_cb_data}; | ||
| 1253 | u32 id{null_cb_data}; | ||
| 1254 | u32 start_pos{}; | ||
| 1255 | u32 counter{}; | ||
| 1256 | } cb_data_state; | ||
| 1257 | |||
| 1195 | Upload::State upload_state; | 1258 | Upload::State upload_state; |
| 1196 | 1259 | ||
| 1197 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1260 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| @@ -1200,6 +1263,8 @@ private: | |||
| 1200 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1263 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1201 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1264 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1202 | 1265 | ||
| 1266 | void InitDirtySettings(); | ||
| 1267 | |||
| 1203 | /** | 1268 | /** |
| 1204 | * Call a macro on this engine. | 1269 | * Call a macro on this engine. |
| 1205 | * @param method Method to call | 1270 | * @param method Method to call |
| @@ -1223,7 +1288,9 @@ private: | |||
| 1223 | void ProcessSyncPoint(); | 1288 | void ProcessSyncPoint(); |
| 1224 | 1289 | ||
| 1225 | /// Handles a write to the CB_DATA[i] register. | 1290 | /// Handles a write to the CB_DATA[i] register. |
| 1291 | void StartCBData(u32 method); | ||
| 1226 | void ProcessCBData(u32 value); | 1292 | void ProcessCBData(u32 value); |
| 1293 | void FinishCBData(); | ||
| 1227 | 1294 | ||
| 1228 | /// Handles a write to the CB_BIND register. | 1295 | /// Handles a write to the CB_BIND register. |
| 1229 | void ProcessCBBind(Regs::ShaderStage stage); | 1296 | void ProcessCBBind(Regs::ShaderStage stage); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..b5f57e534 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 62 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0bb5c068c..0432a9e10 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -105,6 +105,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 105 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 105 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 106 | state.draw.shader_program = 0; | 106 | state.draw.shader_program = 0; |
| 107 | state.Apply(); | 107 | state.Apply(); |
| 108 | clear_framebuffer.Create(); | ||
| 108 | 109 | ||
| 109 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 110 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 110 | CheckExtensions(); | 111 | CheckExtensions(); |
| @@ -124,10 +125,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 124 | auto& gpu = system.GPU().Maxwell3D(); | 125 | auto& gpu = system.GPU().Maxwell3D(); |
| 125 | const auto& regs = gpu.regs; | 126 | const auto& regs = gpu.regs; |
| 126 | 127 | ||
| 127 | if (!gpu.dirty_flags.vertex_attrib_format) { | 128 | if (!gpu.dirty.vertex_attrib_format) { |
| 128 | return state.draw.vertex_array; | 129 | return state.draw.vertex_array; |
| 129 | } | 130 | } |
| 130 | gpu.dirty_flags.vertex_attrib_format = false; | 131 | gpu.dirty.vertex_attrib_format = false; |
| 131 | 132 | ||
| 132 | MICROPROFILE_SCOPE(OpenGL_VAO); | 133 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 133 | 134 | ||
| @@ -181,7 +182,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 181 | } | 182 | } |
| 182 | 183 | ||
| 183 | // Rebinding the VAO invalidates the vertex buffer bindings. | 184 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 184 | gpu.dirty_flags.vertex_array.set(); | 185 | gpu.dirty.ResetVertexArrays(); |
| 185 | 186 | ||
| 186 | state.draw.vertex_array = vao_entry.handle; | 187 | state.draw.vertex_array = vao_entry.handle; |
| 187 | return vao_entry.handle; | 188 | return vao_entry.handle; |
| @@ -189,17 +190,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 189 | 190 | ||
| 190 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 191 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 191 | auto& gpu = system.GPU().Maxwell3D(); | 192 | auto& gpu = system.GPU().Maxwell3D(); |
| 192 | const auto& regs = gpu.regs; | 193 | if (!gpu.dirty.vertex_array_buffers) |
| 193 | |||
| 194 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 195 | return; | 194 | return; |
| 195 | gpu.dirty.vertex_array_buffers = false; | ||
| 196 | |||
| 197 | const auto& regs = gpu.regs; | ||
| 196 | 198 | ||
| 197 | MICROPROFILE_SCOPE(OpenGL_VB); | 199 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 198 | 200 | ||
| 199 | // Upload all guest vertex arrays sequentially to our buffer | 201 | // Upload all guest vertex arrays sequentially to our buffer |
| 200 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 202 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 201 | if (!gpu.dirty_flags.vertex_array[index]) | 203 | if (!gpu.dirty.vertex_array[index]) |
| 202 | continue; | 204 | continue; |
| 205 | gpu.dirty.vertex_array[index] = false; | ||
| 206 | gpu.dirty.vertex_instance[index] = false; | ||
| 203 | 207 | ||
| 204 | const auto& vertex_array = regs.vertex_array[index]; | 208 | const auto& vertex_array = regs.vertex_array[index]; |
| 205 | if (!vertex_array.IsEnabled()) | 209 | if (!vertex_array.IsEnabled()) |
| @@ -224,8 +228,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 224 | glVertexArrayBindingDivisor(vao, index, 0); | 228 | glVertexArrayBindingDivisor(vao, index, 0); |
| 225 | } | 229 | } |
| 226 | } | 230 | } |
| 231 | } | ||
| 232 | |||
| 233 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { | ||
| 234 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 235 | |||
| 236 | if (!gpu.dirty.vertex_instances) | ||
| 237 | return; | ||
| 238 | gpu.dirty.vertex_instances = false; | ||
| 227 | 239 | ||
| 228 | gpu.dirty_flags.vertex_array.reset(); | 240 | const auto& regs = gpu.regs; |
| 241 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 242 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 243 | if (!gpu.dirty.vertex_instance[index]) | ||
| 244 | continue; | ||
| 245 | |||
| 246 | gpu.dirty.vertex_instance[index] = false; | ||
| 247 | |||
| 248 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 249 | regs.vertex_array[index].divisor != 0) { | ||
| 250 | // Enable vertex buffer instancing with the specified divisor. | ||
| 251 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 252 | } else { | ||
| 253 | // Disable the vertex buffer instancing. | ||
| 254 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 255 | } | ||
| 256 | } | ||
| 229 | } | 257 | } |
| 230 | 258 | ||
| 231 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | 259 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| @@ -341,7 +369,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 341 | 369 | ||
| 342 | SyncClipEnabled(clip_distances); | 370 | SyncClipEnabled(clip_distances); |
| 343 | 371 | ||
| 344 | gpu.dirty_flags.shaders = false; | 372 | gpu.dirty.shaders = false; |
| 345 | } | 373 | } |
| 346 | 374 | ||
| 347 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 375 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -424,13 +452,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 424 | 452 | ||
| 425 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 453 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 426 | single_color_target}; | 454 | single_color_target}; |
| 427 | if (fb_config_state == current_framebuffer_config_state && | 455 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 428 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 429 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 456 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 430 | // single color targets). This is done because the guest registers may not change but the | 457 | // single color targets). This is done because the guest registers may not change but the |
| 431 | // host framebuffer may contain different attachments | 458 | // host framebuffer may contain different attachments |
| 432 | return current_depth_stencil_usage; | 459 | return current_depth_stencil_usage; |
| 433 | } | 460 | } |
| 461 | gpu.dirty.render_settings = false; | ||
| 434 | current_framebuffer_config_state = fb_config_state; | 462 | current_framebuffer_config_state = fb_config_state; |
| 435 | 463 | ||
| 436 | texture_cache.GuardRenderTargets(true); | 464 | texture_cache.GuardRenderTargets(true); |
| @@ -519,13 +547,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 519 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 547 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| 520 | } | 548 | } |
| 521 | 549 | ||
| 550 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 551 | bool using_depth_fb, bool using_stencil_fb) { | ||
| 552 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 553 | const auto& regs = gpu.regs; | ||
| 554 | |||
| 555 | texture_cache.GuardRenderTargets(true); | ||
| 556 | View color_surface{}; | ||
| 557 | if (using_color_fb) { | ||
| 558 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | ||
| 559 | } | ||
| 560 | View depth_surface{}; | ||
| 561 | if (using_depth_fb || using_stencil_fb) { | ||
| 562 | depth_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 563 | } | ||
| 564 | texture_cache.GuardRenderTargets(false); | ||
| 565 | |||
| 566 | current_state.draw.draw_framebuffer = clear_framebuffer.handle; | ||
| 567 | current_state.ApplyFramebufferState(); | ||
| 568 | |||
| 569 | if (color_surface) { | ||
| 570 | color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 571 | } else { | ||
| 572 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 573 | } | ||
| 574 | |||
| 575 | if (depth_surface) { | ||
| 576 | const auto& params = depth_surface->GetSurfaceParams(); | ||
| 577 | switch (params.type) { | ||
| 578 | case VideoCore::Surface::SurfaceType::Depth: { | ||
| 579 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 580 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 581 | break; | ||
| 582 | } | ||
| 583 | case VideoCore::Surface::SurfaceType::DepthStencil: { | ||
| 584 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 585 | break; | ||
| 586 | } | ||
| 587 | default: { UNIMPLEMENTED(); } | ||
| 588 | } | ||
| 589 | } else { | ||
| 590 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 591 | 0); | ||
| 592 | } | ||
| 593 | } | ||
| 594 | |||
| 522 | void RasterizerOpenGL::Clear() { | 595 | void RasterizerOpenGL::Clear() { |
| 523 | const auto& regs = system.GPU().Maxwell3D().regs; | 596 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 524 | bool use_color{}; | 597 | bool use_color{}; |
| 525 | bool use_depth{}; | 598 | bool use_depth{}; |
| 526 | bool use_stencil{}; | 599 | bool use_stencil{}; |
| 527 | 600 | ||
| 528 | OpenGLState clear_state; | 601 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 602 | SCOPE_EXIT({ | ||
| 603 | prev_state.AllDirty(); | ||
| 604 | prev_state.Apply(); | ||
| 605 | }); | ||
| 606 | |||
| 607 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 608 | clear_state.SetDefaultViewports(); | ||
| 529 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 609 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 530 | regs.clear_buffers.A) { | 610 | regs.clear_buffers.A) { |
| 531 | use_color = true; | 611 | use_color = true; |
| @@ -545,6 +625,7 @@ void RasterizerOpenGL::Clear() { | |||
| 545 | // true. | 625 | // true. |
| 546 | clear_state.depth.test_enabled = true; | 626 | clear_state.depth.test_enabled = true; |
| 547 | clear_state.depth.test_func = GL_ALWAYS; | 627 | clear_state.depth.test_func = GL_ALWAYS; |
| 628 | clear_state.depth.write_mask = GL_TRUE; | ||
| 548 | } | 629 | } |
| 549 | if (regs.clear_buffers.S) { | 630 | if (regs.clear_buffers.S) { |
| 550 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 631 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); |
| @@ -581,8 +662,9 @@ void RasterizerOpenGL::Clear() { | |||
| 581 | return; | 662 | return; |
| 582 | } | 663 | } |
| 583 | 664 | ||
| 584 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( | 665 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); |
| 585 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); | 666 | |
| 667 | SyncViewport(clear_state); | ||
| 586 | if (regs.clear_flags.scissor) { | 668 | if (regs.clear_flags.scissor) { |
| 587 | SyncScissorTest(clear_state); | 669 | SyncScissorTest(clear_state); |
| 588 | } | 670 | } |
| @@ -591,21 +673,18 @@ void RasterizerOpenGL::Clear() { | |||
| 591 | clear_state.EmulateViewportWithScissor(); | 673 | clear_state.EmulateViewportWithScissor(); |
| 592 | } | 674 | } |
| 593 | 675 | ||
| 594 | clear_state.ApplyColorMask(); | 676 | clear_state.AllDirty(); |
| 595 | clear_state.ApplyDepth(); | 677 | clear_state.Apply(); |
| 596 | clear_state.ApplyStencilTest(); | ||
| 597 | clear_state.ApplyViewport(); | ||
| 598 | clear_state.ApplyFramebufferState(); | ||
| 599 | 678 | ||
| 600 | if (use_color) { | 679 | if (use_color) { |
| 601 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 680 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| 602 | } | 681 | } |
| 603 | 682 | ||
| 604 | if (clear_depth && clear_stencil) { | 683 | if (use_depth && use_stencil) { |
| 605 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 684 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 606 | } else if (clear_depth) { | 685 | } else if (use_depth) { |
| 607 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 686 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 608 | } else if (clear_stencil) { | 687 | } else if (use_stencil) { |
| 609 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 688 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 610 | } | 689 | } |
| 611 | } | 690 | } |
| @@ -661,6 +740,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 661 | 740 | ||
| 662 | // Upload vertex and index data. | 741 | // Upload vertex and index data. |
| 663 | SetupVertexBuffer(vao); | 742 | SetupVertexBuffer(vao); |
| 743 | SetupVertexInstances(vao); | ||
| 664 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | 744 | const GLintptr index_buffer_offset = SetupIndexBuffer(); |
| 665 | 745 | ||
| 666 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | 746 | // Setup draw parameters. It will automatically choose what glDraw* method to use. |
| @@ -687,7 +767,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 687 | 767 | ||
| 688 | if (invalidate) { | 768 | if (invalidate) { |
| 689 | // As all cached buffers are invalidated, we need to recheck their state. | 769 | // As all cached buffers are invalidated, we need to recheck their state. |
| 690 | gpu.dirty_flags.vertex_array.set(); | 770 | gpu.dirty.ResetVertexArrays(); |
| 691 | } | 771 | } |
| 692 | 772 | ||
| 693 | shader_program_manager->ApplyTo(state); | 773 | shader_program_manager->ApplyTo(state); |
| @@ -700,6 +780,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 700 | params.DispatchDraw(); | 780 | params.DispatchDraw(); |
| 701 | 781 | ||
| 702 | accelerate_draw = AccelDraw::Disabled; | 782 | accelerate_draw = AccelDraw::Disabled; |
| 783 | gpu.dirty.memory_general = false; | ||
| 703 | } | 784 | } |
| 704 | 785 | ||
| 705 | void RasterizerOpenGL::FlushAll() {} | 786 | void RasterizerOpenGL::FlushAll() {} |
| @@ -907,10 +988,11 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 907 | } | 988 | } |
| 908 | 989 | ||
| 909 | void RasterizerOpenGL::SyncCullMode() { | 990 | void RasterizerOpenGL::SyncCullMode() { |
| 910 | const auto& regs = system.GPU().Maxwell3D().regs; | 991 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 911 | 992 | ||
| 912 | state.cull.enabled = regs.cull.enabled != 0; | 993 | const auto& regs = maxwell3d.regs; |
| 913 | 994 | ||
| 995 | state.cull.enabled = regs.cull.enabled != 0; | ||
| 914 | if (state.cull.enabled) { | 996 | if (state.cull.enabled) { |
| 915 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 997 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
| 916 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 998 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); |
| @@ -943,16 +1025,21 @@ void RasterizerOpenGL::SyncDepthTestState() { | |||
| 943 | state.depth.test_enabled = regs.depth_test_enable != 0; | 1025 | state.depth.test_enabled = regs.depth_test_enable != 0; |
| 944 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | 1026 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; |
| 945 | 1027 | ||
| 946 | if (!state.depth.test_enabled) | 1028 | if (!state.depth.test_enabled) { |
| 947 | return; | 1029 | return; |
| 1030 | } | ||
| 948 | 1031 | ||
| 949 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1032 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); |
| 950 | } | 1033 | } |
| 951 | 1034 | ||
| 952 | void RasterizerOpenGL::SyncStencilTestState() { | 1035 | void RasterizerOpenGL::SyncStencilTestState() { |
| 953 | const auto& regs = system.GPU().Maxwell3D().regs; | 1036 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 954 | state.stencil.test_enabled = regs.stencil_enable != 0; | 1037 | if (!maxwell3d.dirty.stencil_test) { |
| 1038 | return; | ||
| 1039 | } | ||
| 1040 | const auto& regs = maxwell3d.regs; | ||
| 955 | 1041 | ||
| 1042 | state.stencil.test_enabled = regs.stencil_enable != 0; | ||
| 956 | if (!regs.stencil_enable) { | 1043 | if (!regs.stencil_enable) { |
| 957 | return; | 1044 | return; |
| 958 | } | 1045 | } |
| @@ -981,10 +1068,17 @@ void RasterizerOpenGL::SyncStencilTestState() { | |||
| 981 | state.stencil.back.action_depth_fail = GL_KEEP; | 1068 | state.stencil.back.action_depth_fail = GL_KEEP; |
| 982 | state.stencil.back.action_depth_pass = GL_KEEP; | 1069 | state.stencil.back.action_depth_pass = GL_KEEP; |
| 983 | } | 1070 | } |
| 1071 | state.MarkDirtyStencilState(); | ||
| 1072 | maxwell3d.dirty.stencil_test = false; | ||
| 984 | } | 1073 | } |
| 985 | 1074 | ||
| 986 | void RasterizerOpenGL::SyncColorMask() { | 1075 | void RasterizerOpenGL::SyncColorMask() { |
| 987 | const auto& regs = system.GPU().Maxwell3D().regs; | 1076 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1077 | if (!maxwell3d.dirty.color_mask) { | ||
| 1078 | return; | ||
| 1079 | } | ||
| 1080 | const auto& regs = maxwell3d.regs; | ||
| 1081 | |||
| 988 | const std::size_t count = | 1082 | const std::size_t count = |
| 989 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1083 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; |
| 990 | for (std::size_t i = 0; i < count; i++) { | 1084 | for (std::size_t i = 0; i < count; i++) { |
| @@ -995,6 +1089,9 @@ void RasterizerOpenGL::SyncColorMask() { | |||
| 995 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | 1089 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; |
| 996 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | 1090 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; |
| 997 | } | 1091 | } |
| 1092 | |||
| 1093 | state.MarkDirtyColorMask(); | ||
| 1094 | maxwell3d.dirty.color_mask = false; | ||
| 998 | } | 1095 | } |
| 999 | 1096 | ||
| 1000 | void RasterizerOpenGL::SyncMultiSampleState() { | 1097 | void RasterizerOpenGL::SyncMultiSampleState() { |
| @@ -1009,7 +1106,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() { | |||
| 1009 | } | 1106 | } |
| 1010 | 1107 | ||
| 1011 | void RasterizerOpenGL::SyncBlendState() { | 1108 | void RasterizerOpenGL::SyncBlendState() { |
| 1012 | const auto& regs = system.GPU().Maxwell3D().regs; | 1109 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1110 | if (!maxwell3d.dirty.blend_state) { | ||
| 1111 | return; | ||
| 1112 | } | ||
| 1113 | const auto& regs = maxwell3d.regs; | ||
| 1013 | 1114 | ||
| 1014 | state.blend_color.red = regs.blend_color.r; | 1115 | state.blend_color.red = regs.blend_color.r; |
| 1015 | state.blend_color.green = regs.blend_color.g; | 1116 | state.blend_color.green = regs.blend_color.g; |
| @@ -1032,6 +1133,8 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1032 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1133 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 1033 | state.blend[i].enabled = false; | 1134 | state.blend[i].enabled = false; |
| 1034 | } | 1135 | } |
| 1136 | maxwell3d.dirty.blend_state = false; | ||
| 1137 | state.MarkDirtyBlendState(); | ||
| 1035 | return; | 1138 | return; |
| 1036 | } | 1139 | } |
| 1037 | 1140 | ||
| @@ -1048,6 +1151,9 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1048 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1151 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); |
| 1049 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1152 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); |
| 1050 | } | 1153 | } |
| 1154 | |||
| 1155 | state.MarkDirtyBlendState(); | ||
| 1156 | maxwell3d.dirty.blend_state = false; | ||
| 1051 | } | 1157 | } |
| 1052 | 1158 | ||
| 1053 | void RasterizerOpenGL::SyncLogicOpState() { | 1159 | void RasterizerOpenGL::SyncLogicOpState() { |
| @@ -1099,13 +1205,21 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1099 | } | 1205 | } |
| 1100 | 1206 | ||
| 1101 | void RasterizerOpenGL::SyncPolygonOffset() { | 1207 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1102 | const auto& regs = system.GPU().Maxwell3D().regs; | 1208 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1209 | if (!maxwell3d.dirty.polygon_offset) { | ||
| 1210 | return; | ||
| 1211 | } | ||
| 1212 | const auto& regs = maxwell3d.regs; | ||
| 1213 | |||
| 1103 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | 1214 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; |
| 1104 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | 1215 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; |
| 1105 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | 1216 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; |
| 1106 | state.polygon_offset.units = regs.polygon_offset_units; | 1217 | state.polygon_offset.units = regs.polygon_offset_units; |
| 1107 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1218 | state.polygon_offset.factor = regs.polygon_offset_factor; |
| 1108 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1219 | state.polygon_offset.clamp = regs.polygon_offset_clamp; |
| 1220 | |||
| 1221 | state.MarkDirtyPolygonOffset(); | ||
| 1222 | maxwell3d.dirty.polygon_offset = false; | ||
| 1109 | } | 1223 | } |
| 1110 | 1224 | ||
| 1111 | void RasterizerOpenGL::SyncAlphaTest() { | 1225 | void RasterizerOpenGL::SyncAlphaTest() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 40b571d58..ef34d3f54 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -108,6 +108,9 @@ private: | |||
| 108 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, | 108 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, |
| 109 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); | 109 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 110 | 110 | ||
| 111 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 112 | bool using_depth_fb, bool using_stencil_fb); | ||
| 113 | |||
| 111 | /// Configures the current constbuffers to use for the draw command. | 114 | /// Configures the current constbuffers to use for the draw command. |
| 112 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 115 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 113 | const Shader& shader); | 116 | const Shader& shader); |
| @@ -216,6 +219,7 @@ private: | |||
| 216 | GLuint SetupVertexFormat(); | 219 | GLuint SetupVertexFormat(); |
| 217 | 220 | ||
| 218 | void SetupVertexBuffer(GLuint vao); | 221 | void SetupVertexBuffer(GLuint vao); |
| 222 | void SetupVertexInstances(GLuint vao); | ||
| 219 | 223 | ||
| 220 | GLintptr SetupIndexBuffer(); | 224 | GLintptr SetupIndexBuffer(); |
| 221 | 225 | ||
| @@ -226,6 +230,8 @@ private: | |||
| 226 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 230 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 227 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 231 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 228 | 232 | ||
| 233 | OGLFramebuffer clear_framebuffer; | ||
| 234 | |||
| 229 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 235 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 230 | CachedPageMap cached_pages; | 236 | CachedPageMap cached_pages; |
| 231 | }; | 237 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 32dd9eae7..456ba0403 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -572,7 +572,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 572 | } | 572 | } |
| 573 | 573 | ||
| 574 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 574 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 575 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 575 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 576 | return last_shaders[static_cast<std::size_t>(program)]; | 576 | return last_shaders[static_cast<std::size_t>(program)]; |
| 577 | } | 577 | } |
| 578 | 578 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 0eae98afe..f4777d0b0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -165,6 +165,25 @@ OpenGLState::OpenGLState() { | |||
| 165 | alpha_test.ref = 0.0f; | 165 | alpha_test.ref = 0.0f; |
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | void OpenGLState::SetDefaultViewports() { | ||
| 169 | for (auto& item : viewports) { | ||
| 170 | item.x = 0; | ||
| 171 | item.y = 0; | ||
| 172 | item.width = 0; | ||
| 173 | item.height = 0; | ||
| 174 | item.depth_range_near = 0.0f; | ||
| 175 | item.depth_range_far = 1.0f; | ||
| 176 | item.scissor.enabled = false; | ||
| 177 | item.scissor.x = 0; | ||
| 178 | item.scissor.y = 0; | ||
| 179 | item.scissor.width = 0; | ||
| 180 | item.scissor.height = 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | depth_clamp.far_plane = false; | ||
| 184 | depth_clamp.near_plane = false; | ||
| 185 | } | ||
| 186 | |||
| 168 | void OpenGLState::ApplyDefaultState() { | 187 | void OpenGLState::ApplyDefaultState() { |
| 169 | glEnable(GL_BLEND); | 188 | glEnable(GL_BLEND); |
| 170 | glDisable(GL_FRAMEBUFFER_SRGB); | 189 | glDisable(GL_FRAMEBUFFER_SRGB); |
| @@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const { | |||
| 526 | } | 545 | } |
| 527 | } | 546 | } |
| 528 | 547 | ||
| 529 | void OpenGLState::Apply() const { | 548 | void OpenGLState::Apply() { |
| 530 | MICROPROFILE_SCOPE(OpenGL_State); | 549 | MICROPROFILE_SCOPE(OpenGL_State); |
| 531 | ApplyFramebufferState(); | 550 | ApplyFramebufferState(); |
| 532 | ApplyVertexArrayState(); | 551 | ApplyVertexArrayState(); |
| @@ -536,19 +555,31 @@ void OpenGLState::Apply() const { | |||
| 536 | ApplyPointSize(); | 555 | ApplyPointSize(); |
| 537 | ApplyFragmentColorClamp(); | 556 | ApplyFragmentColorClamp(); |
| 538 | ApplyMultisample(); | 557 | ApplyMultisample(); |
| 558 | if (dirty.color_mask) { | ||
| 559 | ApplyColorMask(); | ||
| 560 | dirty.color_mask = false; | ||
| 561 | } | ||
| 539 | ApplyDepthClamp(); | 562 | ApplyDepthClamp(); |
| 540 | ApplyColorMask(); | ||
| 541 | ApplyViewport(); | 563 | ApplyViewport(); |
| 542 | ApplyStencilTest(); | 564 | if (dirty.stencil_state) { |
| 565 | ApplyStencilTest(); | ||
| 566 | dirty.stencil_state = false; | ||
| 567 | } | ||
| 543 | ApplySRgb(); | 568 | ApplySRgb(); |
| 544 | ApplyCulling(); | 569 | ApplyCulling(); |
| 545 | ApplyDepth(); | 570 | ApplyDepth(); |
| 546 | ApplyPrimitiveRestart(); | 571 | ApplyPrimitiveRestart(); |
| 547 | ApplyBlending(); | 572 | if (dirty.blend_state) { |
| 573 | ApplyBlending(); | ||
| 574 | dirty.blend_state = false; | ||
| 575 | } | ||
| 548 | ApplyLogicOp(); | 576 | ApplyLogicOp(); |
| 549 | ApplyTextures(); | 577 | ApplyTextures(); |
| 550 | ApplySamplers(); | 578 | ApplySamplers(); |
| 551 | ApplyPolygonOffset(); | 579 | if (dirty.polygon_offset) { |
| 580 | ApplyPolygonOffset(); | ||
| 581 | dirty.polygon_offset = false; | ||
| 582 | } | ||
| 552 | ApplyAlphaTest(); | 583 | ApplyAlphaTest(); |
| 553 | } | 584 | } |
| 554 | 585 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..fdf9a8a12 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -195,8 +195,9 @@ public: | |||
| 195 | s_rgb_used = false; | 195 | s_rgb_used = false; |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | void SetDefaultViewports(); | ||
| 198 | /// Apply this state as the current OpenGL state | 199 | /// Apply this state as the current OpenGL state |
| 199 | void Apply() const; | 200 | void Apply(); |
| 200 | 201 | ||
| 201 | void ApplyFramebufferState() const; | 202 | void ApplyFramebufferState() const; |
| 202 | void ApplyVertexArrayState() const; | 203 | void ApplyVertexArrayState() const; |
| @@ -237,11 +238,41 @@ public: | |||
| 237 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | 238 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test |
| 238 | void EmulateViewportWithScissor(); | 239 | void EmulateViewportWithScissor(); |
| 239 | 240 | ||
| 241 | void MarkDirtyBlendState() { | ||
| 242 | dirty.blend_state = true; | ||
| 243 | } | ||
| 244 | |||
| 245 | void MarkDirtyStencilState() { | ||
| 246 | dirty.stencil_state = true; | ||
| 247 | } | ||
| 248 | |||
| 249 | void MarkDirtyPolygonOffset() { | ||
| 250 | dirty.polygon_offset = true; | ||
| 251 | } | ||
| 252 | |||
| 253 | void MarkDirtyColorMask() { | ||
| 254 | dirty.color_mask = true; | ||
| 255 | } | ||
| 256 | |||
| 257 | void AllDirty() { | ||
| 258 | dirty.blend_state = true; | ||
| 259 | dirty.stencil_state = true; | ||
| 260 | dirty.polygon_offset = true; | ||
| 261 | dirty.color_mask = true; | ||
| 262 | } | ||
| 263 | |||
| 240 | private: | 264 | private: |
| 241 | static OpenGLState cur_state; | 265 | static OpenGLState cur_state; |
| 242 | 266 | ||
| 243 | // Workaround for sRGB problems caused by QT not supporting srgb output | 267 | // Workaround for sRGB problems caused by QT not supporting srgb output |
| 244 | static bool s_rgb_used; | 268 | static bool s_rgb_used; |
| 269 | struct { | ||
| 270 | bool blend_state; | ||
| 271 | bool stencil_state; | ||
| 272 | bool viewport_state; | ||
| 273 | bool polygon_offset; | ||
| 274 | bool color_mask; | ||
| 275 | } dirty{}; | ||
| 245 | }; | 276 | }; |
| 246 | 277 | ||
| 247 | } // namespace OpenGL | 278 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b1f6bc7c2..8fcd39a69 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 485 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 485 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 486 | 486 | ||
| 487 | OpenGLState prev_state{OpenGLState::GetCurState()}; | 487 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 488 | SCOPE_EXIT({ prev_state.Apply(); }); | 488 | SCOPE_EXIT({ |
| 489 | prev_state.AllDirty(); | ||
| 490 | prev_state.Apply(); | ||
| 491 | }); | ||
| 489 | 492 | ||
| 490 | OpenGLState state; | 493 | OpenGLState state; |
| 491 | state.draw.read_framebuffer = src_framebuffer.handle; | 494 | state.draw.read_framebuffer = src_framebuffer.handle; |
| 492 | state.draw.draw_framebuffer = dst_framebuffer.handle; | 495 | state.draw.draw_framebuffer = dst_framebuffer.handle; |
| 496 | state.AllDirty(); | ||
| 493 | state.Apply(); | 497 | state.Apply(); |
| 494 | 498 | ||
| 495 | u32 buffers{}; | 499 | u32 buffers{}; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 9ecdddb0d..a05cef3b9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 108 | 108 | ||
| 109 | // Maintain the rasterizer's state as a priority | 109 | // Maintain the rasterizer's state as a priority |
| 110 | OpenGLState prev_state = OpenGLState::GetCurState(); | 110 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 111 | state.AllDirty(); | ||
| 111 | state.Apply(); | 112 | state.Apply(); |
| 112 | 113 | ||
| 113 | if (framebuffer) { | 114 | if (framebuffer) { |
| @@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 140 | system.GetPerfStats().BeginSystemFrame(); | 141 | system.GetPerfStats().BeginSystemFrame(); |
| 141 | 142 | ||
| 142 | // Restore the rasterizer state | 143 | // Restore the rasterizer state |
| 144 | prev_state.AllDirty(); | ||
| 143 | prev_state.Apply(); | 145 | prev_state.Apply(); |
| 144 | } | 146 | } |
| 145 | 147 | ||
| @@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 206 | // Link shaders and get variable locations | 208 | // Link shaders and get variable locations |
| 207 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 209 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 208 | state.draw.shader_program = shader.handle; | 210 | state.draw.shader_program = shader.handle; |
| 211 | state.AllDirty(); | ||
| 209 | state.Apply(); | 212 | state.Apply(); |
| 210 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 213 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| 211 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); | 214 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); |
| @@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 338 | // Workaround brigthness problems in SMO by enabling sRGB in the final output | 341 | // Workaround brigthness problems in SMO by enabling sRGB in the final output |
| 339 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 | 342 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 |
| 340 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); | 343 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); |
| 344 | state.AllDirty(); | ||
| 341 | state.Apply(); | 345 | state.Apply(); |
| 342 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); | 346 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); |
| 343 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 347 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 344 | // Restore default state | 348 | // Restore default state |
| 345 | state.framebuffer_srgb.enabled = false; | 349 | state.framebuffer_srgb.enabled = false; |
| 346 | state.texture_units[0].texture = 0; | 350 | state.texture_units[0].texture = 0; |
| 351 | state.AllDirty(); | ||
| 347 | state.Apply(); | 352 | state.Apply(); |
| 348 | // Clear sRGB state for the next frame | 353 | // Clear sRGB state for the next frame |
| 349 | OpenGLState::ClearsRGBUsed(); | 354 | OpenGLState::ClearsRGBUsed(); |
| @@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 388 | GLuint old_read_fb = state.draw.read_framebuffer; | 393 | GLuint old_read_fb = state.draw.read_framebuffer; |
| 389 | GLuint old_draw_fb = state.draw.draw_framebuffer; | 394 | GLuint old_draw_fb = state.draw.draw_framebuffer; |
| 390 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | 395 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; |
| 396 | state.AllDirty(); | ||
| 391 | state.Apply(); | 397 | state.Apply(); |
| 392 | 398 | ||
| 393 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 399 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| @@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 407 | screenshot_framebuffer.Release(); | 413 | screenshot_framebuffer.Release(); |
| 408 | state.draw.read_framebuffer = old_read_fb; | 414 | state.draw.read_framebuffer = old_read_fb; |
| 409 | state.draw.draw_framebuffer = old_draw_fb; | 415 | state.draw.draw_framebuffer = old_draw_fb; |
| 416 | state.AllDirty(); | ||
| 410 | state.Apply(); | 417 | state.Apply(); |
| 411 | glDeleteRenderbuffers(1, &renderbuffer); | 418 | glDeleteRenderbuffers(1, &renderbuffer); |
| 412 | 419 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7f9623c62..a3a3770a7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | std::lock_guard lock{mutex}; | 116 | std::lock_guard lock{mutex}; |
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | 117 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 118 | 118 | ||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | 119 | if (!maxwell3d.dirty.depth_buffer) { |
| 120 | return depth_buffer.view; | 120 | return depth_buffer.view; |
| 121 | } | 121 | } |
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | 122 | maxwell3d.dirty.depth_buffer = false; |
| 123 | 123 | ||
| 124 | const auto& regs{maxwell3d.regs}; | 124 | const auto& regs{maxwell3d.regs}; |
| 125 | const auto gpu_addr{regs.zeta.Address()}; | 125 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -145,10 +145,10 @@ public: | |||
| 145 | std::lock_guard lock{mutex}; | 145 | std::lock_guard lock{mutex}; |
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | 147 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | 148 | if (!maxwell3d.dirty.render_target[index]) { |
| 149 | return render_targets[index].view; | 149 | return render_targets[index].view; |
| 150 | } | 150 | } |
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | 151 | maxwell3d.dirty.render_target[index] = false; |
| 152 | 152 | ||
| 153 | const auto& regs{maxwell3d.regs}; | 153 | const auto& regs{maxwell3d.regs}; |
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -274,10 +274,11 @@ protected: | |||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | 274 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 275 | const u32 index = surface->GetRenderTarget(); | 275 | const u32 index = surface->GetRenderTarget(); |
| 276 | if (index == DEPTH_RT) { | 276 | if (index == DEPTH_RT) { |
| 277 | maxwell3d.dirty_flags.zeta_buffer = true; | 277 | maxwell3d.dirty.depth_buffer = true; |
| 278 | } else { | 278 | } else { |
| 279 | maxwell3d.dirty_flags.color_buffer.set(index, true); | 279 | maxwell3d.dirty.render_target[index] = true; |
| 280 | } | 280 | } |
| 281 | maxwell3d.dirty.render_settings = true; | ||
| 281 | } | 282 | } |
| 282 | 283 | ||
| 283 | void Register(TSurface surface) { | 284 | void Register(TSurface surface) { |