summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/command_processor.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.cpp5
-rw-r--r--src/video_core/engines/kepler_memory.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp24
9 files changed, 60 insertions, 6 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 28e8c13aa..8b9c548cc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { 34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
35 MICROPROFILE_SCOPE(ProcessCommandLists); 35 MICROPROFILE_SCOPE(ProcessCommandLists);
36 36
37 // On entering GPU code, assume all memory may be touched by the ARM core.
38 maxwell_3d->dirty_flags.OnMemoryWrite();
39
37 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { 40 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
38 LOG_TRACE(HW_GPU, 41 LOG_TRACE(HW_GPU,
39 "Processing method {:08X} on subchannel {} value " 42 "Processing method {:08X} on subchannel {} value "
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 74e44c7fe..8d0700d13 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,8 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
9 11
@@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
47 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
48 50
49 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
52 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54
50 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
51 // We have to invalidate the destination region to evict any outdated surfaces from the 56 // We have to invalidate the destination region to evict any outdated surfaces from the
52 // cache. We do this before actually writing the new data because the destination address 57 // cache. We do this before actually writing the new data because the destination address
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 585290d9f..2adbc9eaf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -3,8 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/core.h"
6#include "core/memory.h" 7#include "core/memory.h"
7#include "video_core/engines/kepler_memory.h" 8#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
9 11
10namespace Tegra::Engines { 12namespace Tegra::Engines {
@@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
47 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 49 rasterizer.InvalidateRegion(dest_address, sizeof(u32));
48 50
49 Memory::Write32(dest_address, data); 51 Memory::Write32(dest_address, data);
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
50 53
51 state.write_offset++; 54 state.write_offset++;
52} 55}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 6de07ea56..1772882b2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -123,10 +123,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
123 123
124 if (regs.reg_array[method] != value) { 124 if (regs.reg_array[method] != value) {
125 regs.reg_array[method] = value; 125 regs.reg_array[method] = value;
126 // Vertex format
126 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 127 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
127 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { 128 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
128 dirty_flags.vertex_attrib_format = true; 129 dirty_flags.vertex_attrib_format = true;
129 } 130 }
131
132 // Vertex buffer
133 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
134 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
135 dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
136 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
137 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
138 dirty_flags.vertex_array |=
139 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
140 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
141 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
142 dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
143 }
130 } 144 }
131 145
132 switch (method) { 146 switch (method) {
@@ -258,6 +272,7 @@ void Maxwell3D::ProcessQueryGet() {
258 query_result.timestamp = CoreTiming::GetTicks(); 272 query_result.timestamp = CoreTiming::GetTicks();
259 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 273 Memory::WriteBlock(*address, &query_result, sizeof(query_result));
260 } 274 }
275 dirty_flags.OnMemoryWrite();
261 break; 276 break;
262 } 277 }
263 default: 278 default:
@@ -334,6 +349,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
334 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 349 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
335 350
336 Memory::Write32(*address, value); 351 Memory::Write32(*address, value);
352 dirty_flags.OnMemoryWrite();
337 353
338 // Increment the current buffer position. 354 // Increment the current buffer position.
339 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 355 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 91ca57883..0848b7121 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1014,6 +1014,11 @@ public:
1014 1014
1015 struct DirtyFlags { 1015 struct DirtyFlags {
1016 bool vertex_attrib_format = true; 1016 bool vertex_attrib_format = true;
1017 u32 vertex_array = 0xFFFFFFFF;
1018
1019 void OnMemoryWrite() {
1020 vertex_array = 0xFFFFFFFF;
1021 }
1017 }; 1022 };
1018 1023
1019 DirtyFlags dirty_flags; 1024 DirtyFlags dirty_flags;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index b8a78cf82..a34e884fe 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,7 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h"
6#include "video_core/engines/maxwell_dma.h" 8#include "video_core/engines/maxwell_dma.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
@@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
54 return; 56 return;
55 } 57 }
56 58
59 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
61
57 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
58 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
59 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 075192c3f..46a6c0308 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
76 return std::make_tuple(uploaded_ptr, uploaded_offset); 76 return std::make_tuple(uploaded_ptr, uploaded_offset);
77} 77}
78 78
79void OGLBufferCache::Map(std::size_t max_size) { 79bool OGLBufferCache::Map(std::size_t max_size) {
80 bool invalidate; 80 bool invalidate;
81 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 81 std::tie(buffer_ptr, buffer_offset_base, invalidate) =
82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
85 if (invalidate) { 85 if (invalidate) {
86 InvalidateAll(); 86 InvalidateAll();
87 } 87 }
88 return invalidate;
88} 89}
89 90
90void OGLBufferCache::Unmap() { 91void OGLBufferCache::Unmap() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 91fca3f6c..c11acfb79 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -50,7 +50,7 @@ public:
50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset. 50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); 51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
52 52
53 void Map(std::size_t max_size); 53 bool Map(std::size_t max_size);
54 void Unmap(); 54 void Unmap();
55 55
56 GLuint GetHandle() const; 56 GLuint GetHandle() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 54cc47a9b..cb0d0c16a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -183,15 +183,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
183 } 183 }
184 state.draw.vertex_array = VAO.handle; 184 state.draw.vertex_array = VAO.handle;
185 state.ApplyVertexBufferState(); 185 state.ApplyVertexBufferState();
186
187 // Rebinding the VAO invalidates the vertex buffer bindings.
188 gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
186} 189}
187 190
188void RasterizerOpenGL::SetupVertexBuffer() { 191void RasterizerOpenGL::SetupVertexBuffer() {
189 MICROPROFILE_SCOPE(OpenGL_VB); 192 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
190 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
191 const auto& regs = gpu.regs; 193 const auto& regs = gpu.regs;
192 194
195 if (!gpu.dirty_flags.vertex_array)
196 return;
197
198 MICROPROFILE_SCOPE(OpenGL_VB);
199
193 // Upload all guest vertex arrays sequentially to our buffer 200 // Upload all guest vertex arrays sequentially to our buffer
194 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 201 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
202 if (~gpu.dirty_flags.vertex_array & (1u << index))
203 continue;
204
195 const auto& vertex_array = regs.vertex_array[index]; 205 const auto& vertex_array = regs.vertex_array[index];
196 if (!vertex_array.IsEnabled()) 206 if (!vertex_array.IsEnabled())
197 continue; 207 continue;
@@ -218,6 +228,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
218 228
219 // Implicit set by glBindVertexBuffer. Stupid glstate handling... 229 // Implicit set by glBindVertexBuffer. Stupid glstate handling...
220 state.draw.vertex_buffer = buffer_cache.GetHandle(); 230 state.draw.vertex_buffer = buffer_cache.GetHandle();
231
232 gpu.dirty_flags.vertex_array = 0;
221} 233}
222 234
223DrawParameters RasterizerOpenGL::SetupDraw() { 235DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -575,7 +587,7 @@ void RasterizerOpenGL::DrawArrays() {
575 return; 587 return;
576 588
577 MICROPROFILE_SCOPE(OpenGL_Drawing); 589 MICROPROFILE_SCOPE(OpenGL_Drawing);
578 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 590 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
579 const auto& regs = gpu.regs; 591 const auto& regs = gpu.regs;
580 592
581 ScopeAcquireGLContext acquire_context{emu_window}; 593 ScopeAcquireGLContext acquire_context{emu_window};
@@ -626,7 +638,11 @@ void RasterizerOpenGL::DrawArrays() {
626 // Add space for at least 18 constant buffers 638 // Add space for at least 18 constant buffers
627 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 639 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
628 640
629 buffer_cache.Map(buffer_size); 641 bool invalidate = buffer_cache.Map(buffer_size);
642 if (invalidate) {
643 // As all cached buffers are invalidated, we need to recheck their state.
644 gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
645 }
630 646
631 SetupVertexFormat(); 647 SetupVertexFormat();
632 SetupVertexBuffer(); 648 SetupVertexBuffer();