diff options
| author | 2019-07-10 15:38:31 -0400 | |
|---|---|---|
| committer | 2019-07-17 17:29:49 -0400 | |
| commit | f2e7b29c14e0207e0476299d69dac7ed4c213c74 (patch) | |
| tree | 670339fbdcb8b1cfa8e14a5c4e19cc03199a595f /src | |
| parent | Merge pull request #2740 from lioncash/bra (diff) | |
| download | yuzu-f2e7b29c14e0207e0476299d69dac7ed4c213c74.tar.gz yuzu-f2e7b29c14e0207e0476299d69dac7ed4c213c74.tar.xz yuzu-f2e7b29c14e0207e0476299d69dac7ed4c213c74.zip | |
Maxwell3D: Rework the dirty system to be more consistant and scaleable
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 136 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 74 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 53 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 17 |
10 files changed, 211 insertions, 80 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..bd036cbe8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..e3d5fb8a9 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..a55915fd3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -86,6 +87,80 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 87 | regs.rt_separate_frag_data = 1; |
| 87 | } | 88 | } |
| 88 | 89 | ||
| 90 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 91 | |||
| 92 | void Maxwell3D::InitDirtySettings() { | ||
| 93 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 94 | const u32 end = start + range; | ||
| 95 | for (std::size_t i = start; i < end; i++) { | ||
| 96 | dirty_pointers[i] = position; | ||
| 97 | } | ||
| 98 | }; | ||
| 99 | for (std::size_t i = 0; i < DirtyRegs::NUM_REGS; i++) { | ||
| 100 | dirty.regs[i] = true; | ||
| 101 | } | ||
| 102 | |||
| 103 | // Init Render Targets | ||
| 104 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 105 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 106 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 107 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 108 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 109 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 110 | rt_dirty_reg++; | ||
| 111 | } | ||
| 112 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 113 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 116 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 117 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 118 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 119 | |||
| 120 | // Init Vertex Arrays | ||
| 121 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 122 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 123 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 124 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 125 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 126 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 127 | vertex_reg += vertex_array_size) { | ||
| 128 | set_block(vertex_reg, 3, va_reg); | ||
| 129 | // The divisor concerns vertex array instances | ||
| 130 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 131 | va_reg++; | ||
| 132 | vi_reg++; | ||
| 133 | } | ||
| 134 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 135 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 136 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 137 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 138 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 139 | vertex_reg += vertex_limit_size) { | ||
| 140 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 141 | va_reg++; | ||
| 142 | } | ||
| 143 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 144 | constexpr u32 vertex_instance_size = | ||
| 145 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 146 | constexpr u32 vertex_instance_end = | ||
| 147 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 148 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 149 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 150 | vertex_reg += vertex_instance_size) { | ||
| 151 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 152 | vi_reg++; | ||
| 153 | } | ||
| 154 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 155 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 156 | |||
| 157 | // Init Shaders | ||
| 158 | constexpr u32 shader_registers_count = | ||
| 159 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 160 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 161 | DIRTY_REGS_POS(shaders)); | ||
| 162 | } | ||
| 163 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 164 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 165 | // Reset the current macro. |
| 91 | executing_macro = 0; | 166 | executing_macro = 0; |
| @@ -143,49 +218,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 218 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 219 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 220 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 221 | std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 222 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 223 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 224 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 225 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 226 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 227 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 228 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 229 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 230 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 231 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 232 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 233 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 234 | } |
| 190 | } | 235 | } |
| 191 | 236 | ||
| @@ -261,7 +306,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 306 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 307 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 308 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 309 | dirty.OnMemoryWrite(); |
| 265 | } | 310 | } |
| 266 | break; | 311 | break; |
| 267 | } | 312 | } |
| @@ -333,7 +378,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 378 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 379 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 380 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 381 | break; |
| 338 | } | 382 | } |
| 339 | default: | 383 | default: |
| @@ -418,8 +462,6 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 462 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); |
| 419 | memory_manager.Write<u32>(address, value); | 463 | memory_manager.Write<u32>(address, value); |
| 420 | 464 | ||
| 421 | dirty_flags.OnMemoryWrite(); | ||
| 422 | |||
| 423 | // Increment the current buffer position. | 465 | // Increment the current buffer position. |
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 466 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; |
| 425 | } | 467 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8d15c8a48..84e6ca145 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1124,23 +1124,73 @@ public: | |||
| 1124 | 1124 | ||
| 1125 | State state{}; | 1125 | State state{}; |
| 1126 | 1126 | ||
| 1127 | struct DirtyFlags { | 1127 | struct DirtyRegs { |
| 1128 | std::bitset<8> color_buffer{0xFF}; | 1128 | static constexpr std::size_t NUM_REGS = 256; |
| 1129 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1129 | union { |
| 1130 | struct { | ||
| 1131 | bool null_dirty; | ||
| 1132 | // Vertex Attributes | ||
| 1133 | bool vertex_attrib_format; | ||
| 1134 | // Vertex Arrays | ||
| 1135 | std::array<bool, 32> vertex_array; | ||
| 1136 | |||
| 1137 | bool vertex_array_buffers; | ||
| 1138 | // Vertex Instances | ||
| 1139 | std::array<bool, 32> vertex_instance; | ||
| 1140 | |||
| 1141 | bool vertex_instances; | ||
| 1142 | // Render Targets | ||
| 1143 | std::array<bool, 8> render_target; | ||
| 1144 | bool depth_buffer; | ||
| 1145 | |||
| 1146 | bool render_settings; | ||
| 1147 | // Shaders | ||
| 1148 | bool shaders; | ||
| 1149 | // State | ||
| 1150 | bool viewport; | ||
| 1151 | bool clip_enabled; | ||
| 1152 | bool clip_coefficient; | ||
| 1153 | bool cull_mode; | ||
| 1154 | bool primitive_restart; | ||
| 1155 | bool depth_test; | ||
| 1156 | bool stencil_test; | ||
| 1157 | bool blend_state; | ||
| 1158 | bool logic_op; | ||
| 1159 | bool fragment_color_clamp; | ||
| 1160 | bool multi_sample; | ||
| 1161 | bool scissor_test; | ||
| 1162 | bool transform_feedback; | ||
| 1163 | bool point; | ||
| 1164 | bool color_mask; | ||
| 1165 | bool polygon_offset; | ||
| 1166 | bool alpha_test; | ||
| 1167 | |||
| 1168 | bool memory_general; | ||
| 1169 | }; | ||
| 1170 | std::array<bool, NUM_REGS> regs; | ||
| 1171 | }; | ||
| 1172 | |||
| 1173 | void ResetVertexArrays() { | ||
| 1174 | std::fill(vertex_array.begin(), vertex_array.end(), true); | ||
| 1175 | vertex_array_buffers = true; | ||
| 1176 | } | ||
| 1130 | 1177 | ||
| 1131 | bool vertex_attrib_format = true; | 1178 | void ResetRenderTargets() { |
| 1132 | bool zeta_buffer = true; | 1179 | depth_buffer = true; |
| 1133 | bool shaders = true; | 1180 | std::fill(render_target.begin(), render_target.end(), true); |
| 1181 | render_settings = true; | ||
| 1182 | } | ||
| 1134 | 1183 | ||
| 1135 | void OnMemoryWrite() { | 1184 | void OnMemoryWrite() { |
| 1136 | zeta_buffer = true; | ||
| 1137 | shaders = true; | 1185 | shaders = true; |
| 1138 | color_buffer.set(); | 1186 | memory_general = true; |
| 1139 | vertex_array.set(); | 1187 | ResetRenderTargets(); |
| 1188 | ResetVertexArrays(); | ||
| 1140 | } | 1189 | } |
| 1141 | }; | ||
| 1142 | 1190 | ||
| 1143 | DirtyFlags dirty_flags; | 1191 | } dirty{}; |
| 1192 | |||
| 1193 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1144 | 1194 | ||
| 1145 | /// Reads a register value located at the input method address | 1195 | /// Reads a register value located at the input method address |
| 1146 | u32 GetRegisterValue(u32 method) const; | 1196 | u32 GetRegisterValue(u32 method) const; |
| @@ -1200,6 +1250,8 @@ private: | |||
| 1200 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1250 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1201 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1251 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1202 | 1252 | ||
| 1253 | void InitDirtySettings(); | ||
| 1254 | |||
| 1203 | /** | 1255 | /** |
| 1204 | * Call a macro on this engine. | 1256 | * Call a macro on this engine. |
| 1205 | * @param method Method to call | 1257 | * @param method Method to call |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..b5f57e534 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 62 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0bb5c068c..c2b5cbff4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -124,10 +124,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 124 | auto& gpu = system.GPU().Maxwell3D(); | 124 | auto& gpu = system.GPU().Maxwell3D(); |
| 125 | const auto& regs = gpu.regs; | 125 | const auto& regs = gpu.regs; |
| 126 | 126 | ||
| 127 | if (!gpu.dirty_flags.vertex_attrib_format) { | 127 | if (!gpu.dirty.vertex_attrib_format) { |
| 128 | return state.draw.vertex_array; | 128 | return state.draw.vertex_array; |
| 129 | } | 129 | } |
| 130 | gpu.dirty_flags.vertex_attrib_format = false; | 130 | gpu.dirty.vertex_attrib_format = false; |
| 131 | 131 | ||
| 132 | MICROPROFILE_SCOPE(OpenGL_VAO); | 132 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 133 | 133 | ||
| @@ -181,7 +181,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 181 | } | 181 | } |
| 182 | 182 | ||
| 183 | // Rebinding the VAO invalidates the vertex buffer bindings. | 183 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 184 | gpu.dirty_flags.vertex_array.set(); | 184 | gpu.dirty.ResetVertexArrays(); |
| 185 | 185 | ||
| 186 | state.draw.vertex_array = vao_entry.handle; | 186 | state.draw.vertex_array = vao_entry.handle; |
| 187 | return vao_entry.handle; | 187 | return vao_entry.handle; |
| @@ -189,17 +189,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 189 | 189 | ||
| 190 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 190 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 191 | auto& gpu = system.GPU().Maxwell3D(); | 191 | auto& gpu = system.GPU().Maxwell3D(); |
| 192 | const auto& regs = gpu.regs; | 192 | if (!gpu.dirty.vertex_array_buffers) |
| 193 | |||
| 194 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 195 | return; | 193 | return; |
| 194 | gpu.dirty.vertex_array_buffers = false; | ||
| 195 | |||
| 196 | const auto& regs = gpu.regs; | ||
| 196 | 197 | ||
| 197 | MICROPROFILE_SCOPE(OpenGL_VB); | 198 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 198 | 199 | ||
| 199 | // Upload all guest vertex arrays sequentially to our buffer | 200 | // Upload all guest vertex arrays sequentially to our buffer |
| 200 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 201 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 201 | if (!gpu.dirty_flags.vertex_array[index]) | 202 | if (!gpu.dirty.vertex_array[index]) |
| 202 | continue; | 203 | continue; |
| 204 | gpu.dirty.vertex_array[index] = false; | ||
| 205 | gpu.dirty.vertex_instance[index] = false; | ||
| 203 | 206 | ||
| 204 | const auto& vertex_array = regs.vertex_array[index]; | 207 | const auto& vertex_array = regs.vertex_array[index]; |
| 205 | if (!vertex_array.IsEnabled()) | 208 | if (!vertex_array.IsEnabled()) |
| @@ -224,8 +227,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 224 | glVertexArrayBindingDivisor(vao, index, 0); | 227 | glVertexArrayBindingDivisor(vao, index, 0); |
| 225 | } | 228 | } |
| 226 | } | 229 | } |
| 230 | } | ||
| 227 | 231 | ||
| 228 | gpu.dirty_flags.vertex_array.reset(); | 232 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { |
| 233 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 234 | |||
| 235 | if (!gpu.dirty.vertex_instances) | ||
| 236 | return; | ||
| 237 | gpu.dirty.vertex_instances = false; | ||
| 238 | |||
| 239 | const auto& regs = gpu.regs; | ||
| 240 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 241 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 242 | if (!gpu.dirty.vertex_instance[index]) | ||
| 243 | continue; | ||
| 244 | |||
| 245 | gpu.dirty.vertex_instance[index] = false; | ||
| 246 | |||
| 247 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 248 | regs.vertex_array[index].divisor != 0) { | ||
| 249 | // Enable vertex buffer instancing with the specified divisor. | ||
| 250 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 251 | } else { | ||
| 252 | // Disable the vertex buffer instancing. | ||
| 253 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 254 | } | ||
| 255 | } | ||
| 229 | } | 256 | } |
| 230 | 257 | ||
| 231 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | 258 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| @@ -341,7 +368,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 341 | 368 | ||
| 342 | SyncClipEnabled(clip_distances); | 369 | SyncClipEnabled(clip_distances); |
| 343 | 370 | ||
| 344 | gpu.dirty_flags.shaders = false; | 371 | gpu.dirty.shaders = false; |
| 345 | } | 372 | } |
| 346 | 373 | ||
| 347 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 374 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -424,13 +451,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 424 | 451 | ||
| 425 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 452 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 426 | single_color_target}; | 453 | single_color_target}; |
| 427 | if (fb_config_state == current_framebuffer_config_state && | 454 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 428 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 429 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 455 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 430 | // single color targets). This is done because the guest registers may not change but the | 456 | // single color targets). This is done because the guest registers may not change but the |
| 431 | // host framebuffer may contain different attachments | 457 | // host framebuffer may contain different attachments |
| 432 | return current_depth_stencil_usage; | 458 | return current_depth_stencil_usage; |
| 433 | } | 459 | } |
| 460 | gpu.dirty.render_settings = false; | ||
| 434 | current_framebuffer_config_state = fb_config_state; | 461 | current_framebuffer_config_state = fb_config_state; |
| 435 | 462 | ||
| 436 | texture_cache.GuardRenderTargets(true); | 463 | texture_cache.GuardRenderTargets(true); |
| @@ -661,6 +688,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 661 | 688 | ||
| 662 | // Upload vertex and index data. | 689 | // Upload vertex and index data. |
| 663 | SetupVertexBuffer(vao); | 690 | SetupVertexBuffer(vao); |
| 691 | SetupVertexInstances(vao); | ||
| 664 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | 692 | const GLintptr index_buffer_offset = SetupIndexBuffer(); |
| 665 | 693 | ||
| 666 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | 694 | // Setup draw parameters. It will automatically choose what glDraw* method to use. |
| @@ -687,7 +715,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 687 | 715 | ||
| 688 | if (invalidate) { | 716 | if (invalidate) { |
| 689 | // As all cached buffers are invalidated, we need to recheck their state. | 717 | // As all cached buffers are invalidated, we need to recheck their state. |
| 690 | gpu.dirty_flags.vertex_array.set(); | 718 | gpu.dirty.ResetVertexArrays(); |
| 691 | } | 719 | } |
| 692 | 720 | ||
| 693 | shader_program_manager->ApplyTo(state); | 721 | shader_program_manager->ApplyTo(state); |
| @@ -700,6 +728,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 700 | params.DispatchDraw(); | 728 | params.DispatchDraw(); |
| 701 | 729 | ||
| 702 | accelerate_draw = AccelDraw::Disabled; | 730 | accelerate_draw = AccelDraw::Disabled; |
| 731 | gpu.dirty.memory_general = false; | ||
| 703 | } | 732 | } |
| 704 | 733 | ||
| 705 | void RasterizerOpenGL::FlushAll() {} | 734 | void RasterizerOpenGL::FlushAll() {} |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 40b571d58..1f6ce4b81 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -216,6 +216,7 @@ private: | |||
| 216 | GLuint SetupVertexFormat(); | 216 | GLuint SetupVertexFormat(); |
| 217 | 217 | ||
| 218 | void SetupVertexBuffer(GLuint vao); | 218 | void SetupVertexBuffer(GLuint vao); |
| 219 | void SetupVertexInstances(GLuint vao); | ||
| 219 | 220 | ||
| 220 | GLintptr SetupIndexBuffer(); | 221 | GLintptr SetupIndexBuffer(); |
| 221 | 222 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 32dd9eae7..456ba0403 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -572,7 +572,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 572 | } | 572 | } |
| 573 | 573 | ||
| 574 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 574 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 575 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 575 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 576 | return last_shaders[static_cast<std::size_t>(program)]; | 576 | return last_shaders[static_cast<std::size_t>(program)]; |
| 577 | } | 577 | } |
| 578 | 578 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7f9623c62..8225022a9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | std::lock_guard lock{mutex}; | 116 | std::lock_guard lock{mutex}; |
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | 117 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 118 | 118 | ||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | 119 | if (!maxwell3d.dirty.depth_buffer) { |
| 120 | return depth_buffer.view; | 120 | return depth_buffer.view; |
| 121 | } | 121 | } |
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | 122 | maxwell3d.dirty.depth_buffer = false; |
| 123 | 123 | ||
| 124 | const auto& regs{maxwell3d.regs}; | 124 | const auto& regs{maxwell3d.regs}; |
| 125 | const auto gpu_addr{regs.zeta.Address()}; | 125 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -145,10 +145,10 @@ public: | |||
| 145 | std::lock_guard lock{mutex}; | 145 | std::lock_guard lock{mutex}; |
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | 147 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | 148 | if (!maxwell3d.dirty.render_target[index]) { |
| 149 | return render_targets[index].view; | 149 | return render_targets[index].view; |
| 150 | } | 150 | } |
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | 151 | maxwell3d.dirty.render_target[index] = false; |
| 152 | 152 | ||
| 153 | const auto& regs{maxwell3d.regs}; | 153 | const auto& regs{maxwell3d.regs}; |
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -272,12 +272,19 @@ protected: | |||
| 272 | 272 | ||
| 273 | void ManageRenderTargetUnregister(TSurface& surface) { | 273 | void ManageRenderTargetUnregister(TSurface& surface) { |
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | 274 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 275 | <<<<<<< HEAD | ||
| 275 | const u32 index = surface->GetRenderTarget(); | 276 | const u32 index = surface->GetRenderTarget(); |
| 276 | if (index == DEPTH_RT) { | 277 | if (index == DEPTH_RT) { |
| 277 | maxwell3d.dirty_flags.zeta_buffer = true; | 278 | maxwell3d.dirty_flags.zeta_buffer = true; |
| 279 | ======= | ||
| 280 | u32 index = surface->GetRenderTarget(); | ||
| 281 | if (index == 8) { | ||
| 282 | maxwell3d.dirty.depth_buffer = true; | ||
| 283 | >>>>>>> Maxwell3D: Rework the dirty system to be more consistant and scaleable | ||
| 278 | } else { | 284 | } else { |
| 279 | maxwell3d.dirty_flags.color_buffer.set(index, true); | 285 | maxwell3d.dirty.render_target[index] = true; |
| 280 | } | 286 | } |
| 287 | maxwell3d.dirty.render_settings = true; | ||
| 281 | } | 288 | } |
| 282 | 289 | ||
| 283 | void Register(TSurface surface) { | 290 | void Register(TSurface surface) { |