diff options
| author | 2018-11-26 18:33:22 -0500 | |
|---|---|---|
| committer | 2018-11-26 18:33:22 -0500 | |
| commit | 67a154e23da149da29e6bd04ce2fb95f3eb7675a (patch) | |
| tree | 914026c2130ca51f929127e470608109890ba0a9 /src | |
| parent | GPU States: Implement Polygon Offset. This is used in SMO all the time. (#1784) (diff) | |
| parent | gl_rasterizer: Skip VB upload if the state is clean. (diff) | |
| download | yuzu-67a154e23da149da29e6bd04ce2fb95f3eb7675a.tar.gz yuzu-67a154e23da149da29e6bd04ce2fb95f3eb7675a.tar.xz yuzu-67a154e23da149da29e6bd04ce2fb95f3eb7675a.zip | |
Merge pull request #1723 from degasus/dirty_flags
gl_rasterizer: Skip VB upload if the state is clean.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/command_processor.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 5 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 |
9 files changed, 60 insertions, 6 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 28e8c13aa..8b9c548cc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB | |||
| 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { | 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { |
| 35 | MICROPROFILE_SCOPE(ProcessCommandLists); | 35 | MICROPROFILE_SCOPE(ProcessCommandLists); |
| 36 | 36 | ||
| 37 | // On entering GPU code, assume all memory may be touched by the ARM core. | ||
| 38 | maxwell_3d->dirty_flags.OnMemoryWrite(); | ||
| 39 | |||
| 37 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { | 40 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { |
| 38 | LOG_TRACE(HW_GPU, | 41 | LOG_TRACE(HW_GPU, |
| 39 | "Processing method {:08X} on subchannel {} value " | 42 | "Processing method {:08X} on subchannel {} value " |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 74e44c7fe..8d0700d13 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 6 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| 9 | 11 | ||
| @@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 47 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | 49 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); |
| 48 | 50 | ||
| 49 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { | 51 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { |
| 52 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 53 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 54 | |||
| 50 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); | 55 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); |
| 51 | // We have to invalidate the destination region to evict any outdated surfaces from the | 56 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 52 | // cache. We do this before actually writing the new data because the destination address | 57 | // cache. We do this before actually writing the new data because the destination address |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 585290d9f..2adbc9eaf 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -3,8 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/core.h" | ||
| 6 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 8 | #include "video_core/engines/kepler_memory.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 9 | 11 | ||
| 10 | namespace Tegra::Engines { | 12 | namespace Tegra::Engines { |
| @@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 47 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); | 49 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); |
| 48 | 50 | ||
| 49 | Memory::Write32(dest_address, data); | 51 | Memory::Write32(dest_address, data); |
| 52 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 50 | 53 | ||
| 51 | state.write_offset++; | 54 | state.write_offset++; |
| 52 | } | 55 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2bc534be3..f0a5470b9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 135 | 135 | ||
| 136 | if (regs.reg_array[method] != value) { | 136 | if (regs.reg_array[method] != value) { |
| 137 | regs.reg_array[method] = value; | 137 | regs.reg_array[method] = value; |
| 138 | // Vertex format | ||
| 138 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | 139 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && |
| 139 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | 140 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { |
| 140 | dirty_flags.vertex_attrib_format = true; | 141 | dirty_flags.vertex_attrib_format = true; |
| 141 | } | 142 | } |
| 143 | |||
| 144 | // Vertex buffer | ||
| 145 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 146 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { | ||
| 147 | dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 148 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 149 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { | ||
| 150 | dirty_flags.vertex_array |= | ||
| 151 | 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 152 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 153 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { | ||
| 154 | dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 155 | } | ||
| 142 | } | 156 | } |
| 143 | 157 | ||
| 144 | switch (method) { | 158 | switch (method) { |
| @@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 270 | query_result.timestamp = CoreTiming::GetTicks(); | 284 | query_result.timestamp = CoreTiming::GetTicks(); |
| 271 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); | 285 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); |
| 272 | } | 286 | } |
| 287 | dirty_flags.OnMemoryWrite(); | ||
| 273 | break; | 288 | break; |
| 274 | } | 289 | } |
| 275 | default: | 290 | default: |
| @@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 346 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 361 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |
| 347 | 362 | ||
| 348 | Memory::Write32(*address, value); | 363 | Memory::Write32(*address, value); |
| 364 | dirty_flags.OnMemoryWrite(); | ||
| 349 | 365 | ||
| 350 | // Increment the current buffer position. | 366 | // Increment the current buffer position. |
| 351 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 367 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 286cde201..e44a23135 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1061,6 +1061,11 @@ public: | |||
| 1061 | 1061 | ||
| 1062 | struct DirtyFlags { | 1062 | struct DirtyFlags { |
| 1063 | bool vertex_attrib_format = true; | 1063 | bool vertex_attrib_format = true; |
| 1064 | u32 vertex_array = 0xFFFFFFFF; | ||
| 1065 | |||
| 1066 | void OnMemoryWrite() { | ||
| 1067 | vertex_array = 0xFFFFFFFF; | ||
| 1068 | } | ||
| 1064 | }; | 1069 | }; |
| 1065 | 1070 | ||
| 1066 | DirtyFlags dirty_flags; | 1071 | DirtyFlags dirty_flags; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index b8a78cf82..a34e884fe 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -2,7 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 6 | #include "video_core/engines/maxwell_dma.h" | 8 | #include "video_core/engines/maxwell_dma.h" |
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| @@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() { | |||
| 54 | return; | 56 | return; |
| 55 | } | 57 | } |
| 56 | 58 | ||
| 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 60 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 61 | |||
| 57 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 58 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| 59 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 64 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 075192c3f..46a6c0308 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s | |||
| 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); | 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void OGLBufferCache::Map(std::size_t max_size) { | 79 | bool OGLBufferCache::Map(std::size_t max_size) { |
| 80 | bool invalidate; | 80 | bool invalidate; |
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = |
| 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); |
| @@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) { | |||
| 85 | if (invalidate) { | 85 | if (invalidate) { |
| 86 | InvalidateAll(); | 86 | InvalidateAll(); |
| 87 | } | 87 | } |
| 88 | return invalidate; | ||
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | void OGLBufferCache::Unmap() { | 91 | void OGLBufferCache::Unmap() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 91fca3f6c..c11acfb79 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -50,7 +50,7 @@ public: | |||
| 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. |
| 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); | 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); |
| 52 | 52 | ||
| 53 | void Map(std::size_t max_size); | 53 | bool Map(std::size_t max_size); |
| 54 | void Unmap(); | 54 | void Unmap(); |
| 55 | 55 | ||
| 56 | GLuint GetHandle() const; | 56 | GLuint GetHandle() const; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d5e65f41..1f9acda36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 176 | } | 176 | } |
| 177 | state.draw.vertex_array = VAO.handle; | 177 | state.draw.vertex_array = VAO.handle; |
| 178 | state.ApplyVertexBufferState(); | 178 | state.ApplyVertexBufferState(); |
| 179 | |||
| 180 | // Rebinding the VAO invalidates the vertex buffer bindings. | ||
| 181 | gpu.dirty_flags.vertex_array = 0xFFFFFFFF; | ||
| 179 | } | 182 | } |
| 180 | 183 | ||
| 181 | void RasterizerOpenGL::SetupVertexBuffer() { | 184 | void RasterizerOpenGL::SetupVertexBuffer() { |
| 182 | MICROPROFILE_SCOPE(OpenGL_VB); | 185 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 183 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 184 | const auto& regs = gpu.regs; | 186 | const auto& regs = gpu.regs; |
| 185 | 187 | ||
| 188 | if (!gpu.dirty_flags.vertex_array) | ||
| 189 | return; | ||
| 190 | |||
| 191 | MICROPROFILE_SCOPE(OpenGL_VB); | ||
| 192 | |||
| 186 | // Upload all guest vertex arrays sequentially to our buffer | 193 | // Upload all guest vertex arrays sequentially to our buffer |
| 187 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 194 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 195 | if (~gpu.dirty_flags.vertex_array & (1u << index)) | ||
| 196 | continue; | ||
| 197 | |||
| 188 | const auto& vertex_array = regs.vertex_array[index]; | 198 | const auto& vertex_array = regs.vertex_array[index]; |
| 189 | if (!vertex_array.IsEnabled()) | 199 | if (!vertex_array.IsEnabled()) |
| 190 | continue; | 200 | continue; |
| @@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 211 | 221 | ||
| 212 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... | 222 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... |
| 213 | state.draw.vertex_buffer = buffer_cache.GetHandle(); | 223 | state.draw.vertex_buffer = buffer_cache.GetHandle(); |
| 224 | |||
| 225 | gpu.dirty_flags.vertex_array = 0; | ||
| 214 | } | 226 | } |
| 215 | 227 | ||
| 216 | DrawParameters RasterizerOpenGL::SetupDraw() { | 228 | DrawParameters RasterizerOpenGL::SetupDraw() { |
| @@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 600 | return; | 612 | return; |
| 601 | 613 | ||
| 602 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 614 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 603 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 615 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 604 | const auto& regs = gpu.regs; | 616 | const auto& regs = gpu.regs; |
| 605 | 617 | ||
| 606 | ScopeAcquireGLContext acquire_context{emu_window}; | 618 | ScopeAcquireGLContext acquire_context{emu_window}; |
| @@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 653 | // Add space for at least 18 constant buffers | 665 | // Add space for at least 18 constant buffers |
| 654 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | 666 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); |
| 655 | 667 | ||
| 656 | buffer_cache.Map(buffer_size); | 668 | bool invalidate = buffer_cache.Map(buffer_size); |
| 669 | if (invalidate) { | ||
| 670 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 671 | gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF; | ||
| 672 | } | ||
| 657 | 673 | ||
| 658 | SetupVertexFormat(); | 674 | SetupVertexFormat(); |
| 659 | SetupVertexBuffer(); | 675 | SetupVertexBuffer(); |