summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-07-14 22:44:36 -0400
committerGravatar GitHub2019-07-14 22:44:36 -0400
commitb77a1ed67a56bea82a20d6c8e581073a709a2c90 (patch)
treeb0a6a257a099f2ff18ee48c898c59671cc566de0 /src
parentMerge pull request #2675 from ReinUsesLisp/opengl-buffer-cache (diff)
parentTexture_Cache: Address Feedback (diff)
downloadyuzu-b77a1ed67a56bea82a20d6c8e581073a709a2c90.tar.gz
yuzu-b77a1ed67a56bea82a20d6c8e581073a709a2c90.tar.xz
yuzu-b77a1ed67a56bea82a20d6c8e581073a709a2c90.zip
Merge pull request #2705 from FernandoS27/tex-cache-fixes
GPU: Fixes to Texture Cache and Include Microprofiles for GL State/BufferCopy/Macro Interpreter
Diffstat (limited to 'src')
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp3
-rw-r--r--src/video_core/texture_cache/surface_base.cpp9
-rw-r--r--src/video_core/texture_cache/surface_base.h10
-rw-r--r--src/video_core/texture_cache/surface_params.cpp13
-rw-r--r--src/video_core/texture_cache/texture_cache.h37
7 files changed, 58 insertions, 22 deletions
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index c766ed692..9f59a2dc1 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -4,14 +4,18 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/microprofile.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro_interpreter.h" 9#include "video_core/macro_interpreter.h"
9 10
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12
10namespace Tegra { 13namespace Tegra {
11 14
12MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 15MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
13 16
14void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { 17void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
18 MICROPROFILE_SCOPE(MacroInterp);
15 Reset(); 19 Reset();
16 registers[1] = parameters[0]; 20 registers[1] = parameters[0];
17 this->parameters = std::move(parameters); 21 this->parameters = std::move(parameters);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d86e137ac..0eae98afe 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -6,8 +6,11 @@
6#include <glad/glad.h> 6#include <glad/glad.h>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
10 11
12MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
13
11namespace OpenGL { 14namespace OpenGL {
12 15
13using Maxwell = Tegra::Engines::Maxwell3D::Regs; 16using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const {
524} 527}
525 528
526void OpenGLState::Apply() const { 529void OpenGLState::Apply() const {
530 MICROPROFILE_SCOPE(OpenGL_State);
527 ApplyFramebufferState(); 531 ApplyFramebufferState();
528 ApplyVertexArrayState(); 532 ApplyVertexArrayState();
529 ApplyShaderProgram(); 533 ApplyShaderProgram();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 08ae1a429..b1f6bc7c2 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
31 31
32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
34MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35 MP_RGB(128, 192, 128));
34 36
35namespace { 37namespace {
36 38
@@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
535} 537}
536 538
537void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 539void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
540 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
538 const auto& src_params = src_surface->GetSurfaceParams(); 541 const auto& src_params = src_surface->GetSurfaceParams();
539 const auto& dst_params = dst_surface->GetSurfaceParams(); 542 const auto& dst_params = dst_surface->GetSurfaceParams();
540 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 543 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 7a0fdb19b..6af9044ca 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
75 75
76 // Linear Surface check 76 // Linear Surface check
77 if (!params.is_tiled) { 77 if (!params.is_tiled) {
78 if (std::tie(params.width, params.height, params.pitch) == 78 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
79 std::tie(rhs.width, rhs.height, rhs.pitch)) { 79 if (params.width == rhs.width) {
80 return MatchStructureResult::FullMatch; 80 return MatchStructureResult::FullMatch;
81 } else {
82 return MatchStructureResult::SemiMatch;
83 }
81 } 84 }
82 return MatchStructureResult::None; 85 return MatchStructureResult::None;
83 } 86 }
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 8ba386a8a..bcce8d863 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -200,8 +200,9 @@ public:
200 modification_tick = tick; 200 modification_tick = tick;
201 } 201 }
202 202
203 void MarkAsRenderTarget(const bool is_target) { 203 void MarkAsRenderTarget(const bool is_target, const u32 index) {
204 this->is_target = is_target; 204 this->is_target = is_target;
205 this->index = index;
205 } 206 }
206 207
207 void MarkAsPicked(const bool is_picked) { 208 void MarkAsPicked(const bool is_picked) {
@@ -221,6 +222,10 @@ public:
221 return is_target; 222 return is_target;
222 } 223 }
223 224
225 u32 GetRenderTarget() const {
226 return index;
227 }
228
224 bool IsRegistered() const { 229 bool IsRegistered() const {
225 return is_registered; 230 return is_registered;
226 } 231 }
@@ -307,10 +312,13 @@ private:
307 return view; 312 return view;
308 } 313 }
309 314
315 static constexpr u32 NO_RT = 0xFFFFFFFF;
316
310 bool is_modified{}; 317 bool is_modified{};
311 bool is_target{}; 318 bool is_target{};
312 bool is_registered{}; 319 bool is_registered{};
313 bool is_picked{}; 320 bool is_picked{};
321 u32 index{NO_RT};
314 u64 modification_tick{}; 322 u64 modification_tick{};
315}; 323};
316 324
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9c56e2b4f..fd5472451 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
290 290
291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, 291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
292 bool uncompressed) const { 292 bool uncompressed) const {
293 const bool tiled{as_host_size ? false : is_tiled};
294 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; 293 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
295 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; 294 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
296 const u32 depth{is_layered ? 1U : GetMipDepth(level)}; 295 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
297 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, 296 if (is_tiled) {
298 GetMipBlockHeight(level), GetMipBlockDepth(level)); 297 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
298 depth, GetMipBlockHeight(level),
299 GetMipBlockDepth(level));
300 } else if (as_host_size || IsBuffer()) {
301 return GetBytesPerPixel() * width * height * depth;
302 } else {
303 // Linear Texture Case
304 return pitch * height * depth;
305 }
299} 306}
300 307
301bool SurfaceParams::operator==(const SurfaceParams& rhs) const { 308bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c9e72531a..7f9623c62 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -133,11 +133,11 @@ public:
133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; 133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); 134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
135 if (depth_buffer.target) 135 if (depth_buffer.target)
136 depth_buffer.target->MarkAsRenderTarget(false); 136 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
137 depth_buffer.target = surface_view.first; 137 depth_buffer.target = surface_view.first;
138 depth_buffer.view = surface_view.second; 138 depth_buffer.view = surface_view.second;
139 if (depth_buffer.target) 139 if (depth_buffer.target)
140 depth_buffer.target->MarkAsRenderTarget(true); 140 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
141 return surface_view.second; 141 return surface_view.second;
142 } 142 }
143 143
@@ -167,11 +167,11 @@ public:
167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), 167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
168 preserve_contents, true); 168 preserve_contents, true);
169 if (render_targets[index].target) 169 if (render_targets[index].target)
170 render_targets[index].target->MarkAsRenderTarget(false); 170 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
171 render_targets[index].target = surface_view.first; 171 render_targets[index].target = surface_view.first;
172 render_targets[index].view = surface_view.second; 172 render_targets[index].view = surface_view.second;
173 if (render_targets[index].target) 173 if (render_targets[index].target)
174 render_targets[index].target->MarkAsRenderTarget(true); 174 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
175 return surface_view.second; 175 return surface_view.second;
176 } 176 }
177 177
@@ -191,7 +191,7 @@ public:
191 if (depth_buffer.target == nullptr) { 191 if (depth_buffer.target == nullptr) {
192 return; 192 return;
193 } 193 }
194 depth_buffer.target->MarkAsRenderTarget(false); 194 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
195 depth_buffer.target = nullptr; 195 depth_buffer.target = nullptr;
196 depth_buffer.view = nullptr; 196 depth_buffer.view = nullptr;
197 } 197 }
@@ -200,7 +200,7 @@ public:
200 if (render_targets[index].target == nullptr) { 200 if (render_targets[index].target == nullptr) {
201 return; 201 return;
202 } 202 }
203 render_targets[index].target->MarkAsRenderTarget(false); 203 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
204 render_targets[index].target = nullptr; 204 render_targets[index].target = nullptr;
205 render_targets[index].view = nullptr; 205 render_targets[index].view = nullptr;
206 } 206 }
@@ -270,6 +270,16 @@ protected:
270 // and reading it from a sepparate buffer. 270 // and reading it from a sepparate buffer.
271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
272 272
273 void ManageRenderTargetUnregister(TSurface& surface) {
274 auto& maxwell3d = system.GPU().Maxwell3D();
275 const u32 index = surface->GetRenderTarget();
276 if (index == DEPTH_RT) {
277 maxwell3d.dirty_flags.zeta_buffer = true;
278 } else {
279 maxwell3d.dirty_flags.color_buffer.set(index, true);
280 }
281 }
282
273 void Register(TSurface surface) { 283 void Register(TSurface surface) {
274 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 284 const GPUVAddr gpu_addr = surface->GetGpuAddr();
275 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); 285 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
@@ -294,6 +304,9 @@ protected:
294 if (guard_render_targets && surface->IsProtected()) { 304 if (guard_render_targets && surface->IsProtected()) {
295 return; 305 return;
296 } 306 }
307 if (!guard_render_targets && surface->IsRenderTarget()) {
308 ManageRenderTargetUnregister(surface);
309 }
297 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 310 const GPUVAddr gpu_addr = surface->GetGpuAddr();
298 const CacheAddr cache_ptr = surface->GetCacheAddr(); 311 const CacheAddr cache_ptr = surface->GetCacheAddr();
299 const std::size_t size = surface->GetSizeInBytes(); 312 const std::size_t size = surface->GetSizeInBytes();
@@ -649,15 +662,6 @@ private:
649 } 662 }
650 return {current_surface, *view}; 663 return {current_surface, *view};
651 } 664 }
652 // The next case is unsafe, so if we r in accurate GPU, just skip it
653 if (Settings::values.use_accurate_gpu_emulation) {
654 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
655 MatchTopologyResult::FullMatch);
656 }
657 // This is the case the texture is a part of the parent.
658 if (current_surface->MatchesSubTexture(params, gpu_addr)) {
659 return RebuildSurface(current_surface, params, is_render);
660 }
661 } else { 665 } else {
662 // If there are many overlaps, odds are they are subtextures of the candidate 666 // If there are many overlaps, odds are they are subtextures of the candidate
663 // surface. We try to construct a new surface based on the candidate parameters, 667 // surface. We try to construct a new surface based on the candidate parameters,
@@ -793,6 +797,9 @@ private:
793 static constexpr u64 registry_page_size{1 << registry_page_bits}; 797 static constexpr u64 registry_page_size{1 << registry_page_bits};
794 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 798 std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
795 799
800 static constexpr u32 DEPTH_RT = 8;
801 static constexpr u32 NO_RT = 0xFFFFFFFF;
802
796 // The L1 Cache is used for fast texture lookup before checking the overlaps 803 // The L1 Cache is used for fast texture lookup before checking the overlaps
797 // This avoids calculating size and other stuffs. 804 // This avoids calculating size and other stuffs.
798 std::unordered_map<CacheAddr, TSurface> l1_cache; 805 std::unordered_map<CacheAddr, TSurface> l1_cache;