diff options
| -rw-r--r-- | src/video_core/command_processor.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 5 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 |
9 files changed, 60 insertions, 6 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 28e8c13aa..8b9c548cc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB | |||
| 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { | 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { |
| 35 | MICROPROFILE_SCOPE(ProcessCommandLists); | 35 | MICROPROFILE_SCOPE(ProcessCommandLists); |
| 36 | 36 | ||
| 37 | // On entering GPU code, assume all memory may be touched by the ARM core. | ||
| 38 | maxwell_3d->dirty_flags.OnMemoryWrite(); | ||
| 39 | |||
| 37 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { | 40 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { |
| 38 | LOG_TRACE(HW_GPU, | 41 | LOG_TRACE(HW_GPU, |
| 39 | "Processing method {:08X} on subchannel {} value " | 42 | "Processing method {:08X} on subchannel {} value " |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 74e44c7fe..8d0700d13 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 6 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| 9 | 11 | ||
| @@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 47 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | 49 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); |
| 48 | 50 | ||
| 49 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { | 51 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { |
| 52 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 53 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 54 | |||
| 50 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); | 55 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); |
| 51 | // We have to invalidate the destination region to evict any outdated surfaces from the | 56 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 52 | // cache. We do this before actually writing the new data because the destination address | 57 | // cache. We do this before actually writing the new data because the destination address |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 585290d9f..2adbc9eaf 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -3,8 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/core.h" | ||
| 6 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 8 | #include "video_core/engines/kepler_memory.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 9 | 11 | ||
| 10 | namespace Tegra::Engines { | 12 | namespace Tegra::Engines { |
| @@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 47 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); | 49 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); |
| 48 | 50 | ||
| 49 | Memory::Write32(dest_address, data); | 51 | Memory::Write32(dest_address, data); |
| 52 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 50 | 53 | ||
| 51 | state.write_offset++; | 54 | state.write_offset++; |
| 52 | } | 55 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6de07ea56..1772882b2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -123,10 +123,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 123 | 123 | ||
| 124 | if (regs.reg_array[method] != value) { | 124 | if (regs.reg_array[method] != value) { |
| 125 | regs.reg_array[method] = value; | 125 | regs.reg_array[method] = value; |
| 126 | // Vertex format | ||
| 126 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | 127 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && |
| 127 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | 128 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { |
| 128 | dirty_flags.vertex_attrib_format = true; | 129 | dirty_flags.vertex_attrib_format = true; |
| 129 | } | 130 | } |
| 131 | |||
| 132 | // Vertex buffer | ||
| 133 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 134 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { | ||
| 135 | dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 136 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 137 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { | ||
| 138 | dirty_flags.vertex_array |= | ||
| 139 | 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 140 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 141 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { | ||
| 142 | dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 143 | } | ||
| 130 | } | 144 | } |
| 131 | 145 | ||
| 132 | switch (method) { | 146 | switch (method) { |
| @@ -258,6 +272,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 258 | query_result.timestamp = CoreTiming::GetTicks(); | 272 | query_result.timestamp = CoreTiming::GetTicks(); |
| 259 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); | 273 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); |
| 260 | } | 274 | } |
| 275 | dirty_flags.OnMemoryWrite(); | ||
| 261 | break; | 276 | break; |
| 262 | } | 277 | } |
| 263 | default: | 278 | default: |
| @@ -334,6 +349,7 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 334 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 349 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |
| 335 | 350 | ||
| 336 | Memory::Write32(*address, value); | 351 | Memory::Write32(*address, value); |
| 352 | dirty_flags.OnMemoryWrite(); | ||
| 337 | 353 | ||
| 338 | // Increment the current buffer position. | 354 | // Increment the current buffer position. |
| 339 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 355 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 91ca57883..0848b7121 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1014,6 +1014,11 @@ public: | |||
| 1014 | 1014 | ||
| 1015 | struct DirtyFlags { | 1015 | struct DirtyFlags { |
| 1016 | bool vertex_attrib_format = true; | 1016 | bool vertex_attrib_format = true; |
| 1017 | u32 vertex_array = 0xFFFFFFFF; | ||
| 1018 | |||
| 1019 | void OnMemoryWrite() { | ||
| 1020 | vertex_array = 0xFFFFFFFF; | ||
| 1021 | } | ||
| 1017 | }; | 1022 | }; |
| 1018 | 1023 | ||
| 1019 | DirtyFlags dirty_flags; | 1024 | DirtyFlags dirty_flags; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index b8a78cf82..a34e884fe 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -2,7 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 6 | #include "video_core/engines/maxwell_dma.h" | 8 | #include "video_core/engines/maxwell_dma.h" |
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| @@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() { | |||
| 54 | return; | 56 | return; |
| 55 | } | 57 | } |
| 56 | 58 | ||
| 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 60 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 61 | |||
| 57 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 58 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| 59 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 64 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 075192c3f..46a6c0308 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s | |||
| 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); | 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void OGLBufferCache::Map(std::size_t max_size) { | 79 | bool OGLBufferCache::Map(std::size_t max_size) { |
| 80 | bool invalidate; | 80 | bool invalidate; |
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = |
| 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); |
| @@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) { | |||
| 85 | if (invalidate) { | 85 | if (invalidate) { |
| 86 | InvalidateAll(); | 86 | InvalidateAll(); |
| 87 | } | 87 | } |
| 88 | return invalidate; | ||
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | void OGLBufferCache::Unmap() { | 91 | void OGLBufferCache::Unmap() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 91fca3f6c..c11acfb79 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -50,7 +50,7 @@ public: | |||
| 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. |
| 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); | 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); |
| 52 | 52 | ||
| 53 | void Map(std::size_t max_size); | 53 | bool Map(std::size_t max_size); |
| 54 | void Unmap(); | 54 | void Unmap(); |
| 55 | 55 | ||
| 56 | GLuint GetHandle() const; | 56 | GLuint GetHandle() const; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 54cc47a9b..cb0d0c16a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -183,15 +183,25 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 183 | } | 183 | } |
| 184 | state.draw.vertex_array = VAO.handle; | 184 | state.draw.vertex_array = VAO.handle; |
| 185 | state.ApplyVertexBufferState(); | 185 | state.ApplyVertexBufferState(); |
| 186 | |||
| 187 | // Rebinding the VAO invalidates the vertex buffer bindings. | ||
| 188 | gpu.dirty_flags.vertex_array = 0xFFFFFFFF; | ||
| 186 | } | 189 | } |
| 187 | 190 | ||
| 188 | void RasterizerOpenGL::SetupVertexBuffer() { | 191 | void RasterizerOpenGL::SetupVertexBuffer() { |
| 189 | MICROPROFILE_SCOPE(OpenGL_VB); | 192 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 190 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 191 | const auto& regs = gpu.regs; | 193 | const auto& regs = gpu.regs; |
| 192 | 194 | ||
| 195 | if (!gpu.dirty_flags.vertex_array) | ||
| 196 | return; | ||
| 197 | |||
| 198 | MICROPROFILE_SCOPE(OpenGL_VB); | ||
| 199 | |||
| 193 | // Upload all guest vertex arrays sequentially to our buffer | 200 | // Upload all guest vertex arrays sequentially to our buffer |
| 194 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 201 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 202 | if (~gpu.dirty_flags.vertex_array & (1u << index)) | ||
| 203 | continue; | ||
| 204 | |||
| 195 | const auto& vertex_array = regs.vertex_array[index]; | 205 | const auto& vertex_array = regs.vertex_array[index]; |
| 196 | if (!vertex_array.IsEnabled()) | 206 | if (!vertex_array.IsEnabled()) |
| 197 | continue; | 207 | continue; |
| @@ -218,6 +228,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 218 | 228 | ||
| 219 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... | 229 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... |
| 220 | state.draw.vertex_buffer = buffer_cache.GetHandle(); | 230 | state.draw.vertex_buffer = buffer_cache.GetHandle(); |
| 231 | |||
| 232 | gpu.dirty_flags.vertex_array = 0; | ||
| 221 | } | 233 | } |
| 222 | 234 | ||
| 223 | DrawParameters RasterizerOpenGL::SetupDraw() { | 235 | DrawParameters RasterizerOpenGL::SetupDraw() { |
| @@ -575,7 +587,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 575 | return; | 587 | return; |
| 576 | 588 | ||
| 577 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 589 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 578 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 590 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 579 | const auto& regs = gpu.regs; | 591 | const auto& regs = gpu.regs; |
| 580 | 592 | ||
| 581 | ScopeAcquireGLContext acquire_context{emu_window}; | 593 | ScopeAcquireGLContext acquire_context{emu_window}; |
| @@ -626,7 +638,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 626 | // Add space for at least 18 constant buffers | 638 | // Add space for at least 18 constant buffers |
| 627 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | 639 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); |
| 628 | 640 | ||
| 629 | buffer_cache.Map(buffer_size); | 641 | bool invalidate = buffer_cache.Map(buffer_size); |
| 642 | if (invalidate) { | ||
| 643 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 644 | gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF; | ||
| 645 | } | ||
| 630 | 646 | ||
| 631 | SetupVertexFormat(); | 647 | SetupVertexFormat(); |
| 632 | SetupVertexBuffer(); | 648 | SetupVertexBuffer(); |