summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-11-26 18:33:22 -0500
committerGravatar GitHub2018-11-26 18:33:22 -0500
commit67a154e23da149da29e6bd04ce2fb95f3eb7675a (patch)
tree914026c2130ca51f929127e470608109890ba0a9 /src
parentGPU States: Implement Polygon Offset. This is used in SMO all the time. (#1784) (diff)
parentgl_rasterizer: Skip VB upload if the state is clean. (diff)
downloadyuzu-67a154e23da149da29e6bd04ce2fb95f3eb7675a.tar.gz
yuzu-67a154e23da149da29e6bd04ce2fb95f3eb7675a.tar.xz
yuzu-67a154e23da149da29e6bd04ce2fb95f3eb7675a.zip
Merge pull request #1723 from degasus/dirty_flags
gl_rasterizer: Skip VB upload if the state is clean.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/command_processor.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.cpp5
-rw-r--r--src/video_core/engines/kepler_memory.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp24
9 files changed, 60 insertions, 6 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 28e8c13aa..8b9c548cc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { 34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
35 MICROPROFILE_SCOPE(ProcessCommandLists); 35 MICROPROFILE_SCOPE(ProcessCommandLists);
36 36
37 // On entering GPU code, assume all memory may be touched by the ARM core.
38 maxwell_3d->dirty_flags.OnMemoryWrite();
39
37 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { 40 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
38 LOG_TRACE(HW_GPU, 41 LOG_TRACE(HW_GPU,
39 "Processing method {:08X} on subchannel {} value " 42 "Processing method {:08X} on subchannel {} value "
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 74e44c7fe..8d0700d13 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,8 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
9 11
@@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
47 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
48 50
49 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
52 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54
50 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
51 // We have to invalidate the destination region to evict any outdated surfaces from the 56 // We have to invalidate the destination region to evict any outdated surfaces from the
52 // cache. We do this before actually writing the new data because the destination address 57 // cache. We do this before actually writing the new data because the destination address
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 585290d9f..2adbc9eaf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -3,8 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/core.h"
6#include "core/memory.h" 7#include "core/memory.h"
7#include "video_core/engines/kepler_memory.h" 8#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
9 11
10namespace Tegra::Engines { 12namespace Tegra::Engines {
@@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
47 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 49 rasterizer.InvalidateRegion(dest_address, sizeof(u32));
48 50
49 Memory::Write32(dest_address, data); 51 Memory::Write32(dest_address, data);
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
50 53
51 state.write_offset++; 54 state.write_offset++;
52} 55}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2bc534be3..f0a5470b9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
135 135
136 if (regs.reg_array[method] != value) { 136 if (regs.reg_array[method] != value) {
137 regs.reg_array[method] = value; 137 regs.reg_array[method] = value;
138 // Vertex format
138 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 139 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
139 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { 140 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
140 dirty_flags.vertex_attrib_format = true; 141 dirty_flags.vertex_attrib_format = true;
141 } 142 }
143
144 // Vertex buffer
145 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
146 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
147 dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
148 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
149 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
150 dirty_flags.vertex_array |=
151 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
152 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
153 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
154 dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
155 }
142 } 156 }
143 157
144 switch (method) { 158 switch (method) {
@@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() {
270 query_result.timestamp = CoreTiming::GetTicks(); 284 query_result.timestamp = CoreTiming::GetTicks();
271 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 285 Memory::WriteBlock(*address, &query_result, sizeof(query_result));
272 } 286 }
287 dirty_flags.OnMemoryWrite();
273 break; 288 break;
274 } 289 }
275 default: 290 default:
@@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
346 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 361 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
347 362
348 Memory::Write32(*address, value); 363 Memory::Write32(*address, value);
364 dirty_flags.OnMemoryWrite();
349 365
350 // Increment the current buffer position. 366 // Increment the current buffer position.
351 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 367 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 286cde201..e44a23135 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1061,6 +1061,11 @@ public:
1061 1061
1062 struct DirtyFlags { 1062 struct DirtyFlags {
1063 bool vertex_attrib_format = true; 1063 bool vertex_attrib_format = true;
1064 u32 vertex_array = 0xFFFFFFFF;
1065
1066 void OnMemoryWrite() {
1067 vertex_array = 0xFFFFFFFF;
1068 }
1064 }; 1069 };
1065 1070
1066 DirtyFlags dirty_flags; 1071 DirtyFlags dirty_flags;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index b8a78cf82..a34e884fe 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,7 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h"
6#include "video_core/engines/maxwell_dma.h" 8#include "video_core/engines/maxwell_dma.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
@@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
54 return; 56 return;
55 } 57 }
56 58
59 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
61
57 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
58 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
59 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 075192c3f..46a6c0308 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
76 return std::make_tuple(uploaded_ptr, uploaded_offset); 76 return std::make_tuple(uploaded_ptr, uploaded_offset);
77} 77}
78 78
79void OGLBufferCache::Map(std::size_t max_size) { 79bool OGLBufferCache::Map(std::size_t max_size) {
80 bool invalidate; 80 bool invalidate;
81 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 81 std::tie(buffer_ptr, buffer_offset_base, invalidate) =
82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
85 if (invalidate) { 85 if (invalidate) {
86 InvalidateAll(); 86 InvalidateAll();
87 } 87 }
88 return invalidate;
88} 89}
89 90
90void OGLBufferCache::Unmap() { 91void OGLBufferCache::Unmap() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 91fca3f6c..c11acfb79 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -50,7 +50,7 @@ public:
50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset. 50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); 51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
52 52
53 void Map(std::size_t max_size); 53 bool Map(std::size_t max_size);
54 void Unmap(); 54 void Unmap();
55 55
56 GLuint GetHandle() const; 56 GLuint GetHandle() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2d5e65f41..1f9acda36 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
176 } 176 }
177 state.draw.vertex_array = VAO.handle; 177 state.draw.vertex_array = VAO.handle;
178 state.ApplyVertexBufferState(); 178 state.ApplyVertexBufferState();
179
180 // Rebinding the VAO invalidates the vertex buffer bindings.
181 gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
179} 182}
180 183
181void RasterizerOpenGL::SetupVertexBuffer() { 184void RasterizerOpenGL::SetupVertexBuffer() {
182 MICROPROFILE_SCOPE(OpenGL_VB); 185 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
183 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
184 const auto& regs = gpu.regs; 186 const auto& regs = gpu.regs;
185 187
188 if (!gpu.dirty_flags.vertex_array)
189 return;
190
191 MICROPROFILE_SCOPE(OpenGL_VB);
192
186 // Upload all guest vertex arrays sequentially to our buffer 193 // Upload all guest vertex arrays sequentially to our buffer
187 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 194 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
195 if (~gpu.dirty_flags.vertex_array & (1u << index))
196 continue;
197
188 const auto& vertex_array = regs.vertex_array[index]; 198 const auto& vertex_array = regs.vertex_array[index];
189 if (!vertex_array.IsEnabled()) 199 if (!vertex_array.IsEnabled())
190 continue; 200 continue;
@@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
211 221
212 // Implicit set by glBindVertexBuffer. Stupid glstate handling... 222 // Implicit set by glBindVertexBuffer. Stupid glstate handling...
213 state.draw.vertex_buffer = buffer_cache.GetHandle(); 223 state.draw.vertex_buffer = buffer_cache.GetHandle();
224
225 gpu.dirty_flags.vertex_array = 0;
214} 226}
215 227
216DrawParameters RasterizerOpenGL::SetupDraw() { 228DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() {
600 return; 612 return;
601 613
602 MICROPROFILE_SCOPE(OpenGL_Drawing); 614 MICROPROFILE_SCOPE(OpenGL_Drawing);
603 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 615 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
604 const auto& regs = gpu.regs; 616 const auto& regs = gpu.regs;
605 617
606 ScopeAcquireGLContext acquire_context{emu_window}; 618 ScopeAcquireGLContext acquire_context{emu_window};
@@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() {
653 // Add space for at least 18 constant buffers 665 // Add space for at least 18 constant buffers
654 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 666 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
655 667
656 buffer_cache.Map(buffer_size); 668 bool invalidate = buffer_cache.Map(buffer_size);
669 if (invalidate) {
670 // As all cached buffers are invalidated, we need to recheck their state.
671 gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
672 }
657 673
658 SetupVertexFormat(); 674 SetupVertexFormat();
659 SetupVertexBuffer(); 675 SetupVertexBuffer();