diff options
| author | 2019-07-14 22:44:36 -0400 | |
|---|---|---|
| committer | 2019-07-14 22:44:36 -0400 | |
| commit | b77a1ed67a56bea82a20d6c8e581073a709a2c90 (patch) | |
| tree | b0a6a257a099f2ff18ee48c898c59671cc566de0 /src | |
| parent | Merge pull request #2675 from ReinUsesLisp/opengl-buffer-cache (diff) | |
| parent | Texture_Cache: Address Feedback (diff) | |
| download | yuzu-b77a1ed67a56bea82a20d6c8e581073a709a2c90.tar.gz yuzu-b77a1ed67a56bea82a20d6c8e581073a709a2c90.tar.xz yuzu-b77a1ed67a56bea82a20d6c8e581073a709a2c90.zip | |
Merge pull request #2705 from FernandoS27/tex-cache-fixes
GPU: Fixes to Texture Cache and Include Microprofiles for GL State/BufferCopy/Macro Interpreter
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/macro_interpreter.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_base.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_base.h | 10 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 37 |
7 files changed, 58 insertions, 22 deletions
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692..9f59a2dc1 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -4,14 +4,18 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/macro_interpreter.h" | 9 | #include "video_core/macro_interpreter.h" |
| 9 | 10 | ||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||
| 12 | |||
| 10 | namespace Tegra { | 13 | namespace Tegra { |
| 11 | 14 | ||
| 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 15 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 13 | 16 | ||
| 14 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | 17 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { |
| 18 | MICROPROFILE_SCOPE(MacroInterp); | ||
| 15 | Reset(); | 19 | Reset(); |
| 16 | registers[1] = parameters[0]; | 20 | registers[1] = parameters[0]; |
| 17 | this->parameters = std::move(parameters); | 21 | this->parameters = std::move(parameters); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac..0eae98afe 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -6,8 +6,11 @@ | |||
| 6 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | 10 | #include "video_core/renderer_opengl/gl_state.h" |
| 10 | 11 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||
| 13 | |||
| 11 | namespace OpenGL { | 14 | namespace OpenGL { |
| 12 | 15 | ||
| 13 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 16 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const { | |||
| 524 | } | 527 | } |
| 525 | 528 | ||
| 526 | void OpenGLState::Apply() const { | 529 | void OpenGLState::Apply() const { |
| 530 | MICROPROFILE_SCOPE(OpenGL_State); | ||
| 527 | ApplyFramebufferState(); | 531 | ApplyFramebufferState(); |
| 528 | ApplyVertexArrayState(); | 532 | ApplyVertexArrayState(); |
| 529 | ApplyShaderProgram(); | 533 | ApplyShaderProgram(); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 08ae1a429..b1f6bc7c2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 31 | 31 | ||
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); |
| 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); |
| 34 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | ||
| 35 | MP_RGB(128, 192, 128)); | ||
| 34 | 36 | ||
| 35 | namespace { | 37 | namespace { |
| 36 | 38 | ||
| @@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 535 | } | 537 | } |
| 536 | 538 | ||
| 537 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 539 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { |
| 540 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | ||
| 538 | const auto& src_params = src_surface->GetSurfaceParams(); | 541 | const auto& src_params = src_surface->GetSurfaceParams(); |
| 539 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 542 | const auto& dst_params = dst_surface->GetSurfaceParams(); |
| 540 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | 543 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7a0fdb19b..6af9044ca 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) | |||
| 75 | 75 | ||
| 76 | // Linear Surface check | 76 | // Linear Surface check |
| 77 | if (!params.is_tiled) { | 77 | if (!params.is_tiled) { |
| 78 | if (std::tie(params.width, params.height, params.pitch) == | 78 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { |
| 79 | std::tie(rhs.width, rhs.height, rhs.pitch)) { | 79 | if (params.width == rhs.width) { |
| 80 | return MatchStructureResult::FullMatch; | 80 | return MatchStructureResult::FullMatch; |
| 81 | } else { | ||
| 82 | return MatchStructureResult::SemiMatch; | ||
| 83 | } | ||
| 81 | } | 84 | } |
| 82 | return MatchStructureResult::None; | 85 | return MatchStructureResult::None; |
| 83 | } | 86 | } |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 8ba386a8a..bcce8d863 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -200,8 +200,9 @@ public: | |||
| 200 | modification_tick = tick; | 200 | modification_tick = tick; |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | void MarkAsRenderTarget(const bool is_target) { | 203 | void MarkAsRenderTarget(const bool is_target, const u32 index) { |
| 204 | this->is_target = is_target; | 204 | this->is_target = is_target; |
| 205 | this->index = index; | ||
| 205 | } | 206 | } |
| 206 | 207 | ||
| 207 | void MarkAsPicked(const bool is_picked) { | 208 | void MarkAsPicked(const bool is_picked) { |
| @@ -221,6 +222,10 @@ public: | |||
| 221 | return is_target; | 222 | return is_target; |
| 222 | } | 223 | } |
| 223 | 224 | ||
| 225 | u32 GetRenderTarget() const { | ||
| 226 | return index; | ||
| 227 | } | ||
| 228 | |||
| 224 | bool IsRegistered() const { | 229 | bool IsRegistered() const { |
| 225 | return is_registered; | 230 | return is_registered; |
| 226 | } | 231 | } |
| @@ -307,10 +312,13 @@ private: | |||
| 307 | return view; | 312 | return view; |
| 308 | } | 313 | } |
| 309 | 314 | ||
| 315 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 316 | |||
| 310 | bool is_modified{}; | 317 | bool is_modified{}; |
| 311 | bool is_target{}; | 318 | bool is_target{}; |
| 312 | bool is_registered{}; | 319 | bool is_registered{}; |
| 313 | bool is_picked{}; | 320 | bool is_picked{}; |
| 321 | u32 index{NO_RT}; | ||
| 314 | u64 modification_tick{}; | 322 | u64 modification_tick{}; |
| 315 | }; | 323 | }; |
| 316 | 324 | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9c56e2b4f..fd5472451 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 290 | 290 | ||
| 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, |
| 292 | bool uncompressed) const { | 292 | bool uncompressed) const { |
| 293 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 294 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | 293 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; |
| 295 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | 294 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; |
| 296 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | 295 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; |
| 297 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, | 296 | if (is_tiled) { |
| 298 | GetMipBlockHeight(level), GetMipBlockDepth(level)); | 297 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, |
| 298 | depth, GetMipBlockHeight(level), | ||
| 299 | GetMipBlockDepth(level)); | ||
| 300 | } else if (as_host_size || IsBuffer()) { | ||
| 301 | return GetBytesPerPixel() * width * height * depth; | ||
| 302 | } else { | ||
| 303 | // Linear Texture Case | ||
| 304 | return pitch * height * depth; | ||
| 305 | } | ||
| 299 | } | 306 | } |
| 300 | 307 | ||
| 301 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | 308 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9e72531a..7f9623c62 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -133,11 +133,11 @@ public: | |||
| 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |
| 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); |
| 135 | if (depth_buffer.target) | 135 | if (depth_buffer.target) |
| 136 | depth_buffer.target->MarkAsRenderTarget(false); | 136 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 137 | depth_buffer.target = surface_view.first; | 137 | depth_buffer.target = surface_view.first; |
| 138 | depth_buffer.view = surface_view.second; | 138 | depth_buffer.view = surface_view.second; |
| 139 | if (depth_buffer.target) | 139 | if (depth_buffer.target) |
| 140 | depth_buffer.target->MarkAsRenderTarget(true); | 140 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); |
| 141 | return surface_view.second; | 141 | return surface_view.second; |
| 142 | } | 142 | } |
| 143 | 143 | ||
| @@ -167,11 +167,11 @@ public: | |||
| 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
| 168 | preserve_contents, true); | 168 | preserve_contents, true); |
| 169 | if (render_targets[index].target) | 169 | if (render_targets[index].target) |
| 170 | render_targets[index].target->MarkAsRenderTarget(false); | 170 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 171 | render_targets[index].target = surface_view.first; | 171 | render_targets[index].target = surface_view.first; |
| 172 | render_targets[index].view = surface_view.second; | 172 | render_targets[index].view = surface_view.second; |
| 173 | if (render_targets[index].target) | 173 | if (render_targets[index].target) |
| 174 | render_targets[index].target->MarkAsRenderTarget(true); | 174 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); |
| 175 | return surface_view.second; | 175 | return surface_view.second; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| @@ -191,7 +191,7 @@ public: | |||
| 191 | if (depth_buffer.target == nullptr) { | 191 | if (depth_buffer.target == nullptr) { |
| 192 | return; | 192 | return; |
| 193 | } | 193 | } |
| 194 | depth_buffer.target->MarkAsRenderTarget(false); | 194 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 195 | depth_buffer.target = nullptr; | 195 | depth_buffer.target = nullptr; |
| 196 | depth_buffer.view = nullptr; | 196 | depth_buffer.view = nullptr; |
| 197 | } | 197 | } |
| @@ -200,7 +200,7 @@ public: | |||
| 200 | if (render_targets[index].target == nullptr) { | 200 | if (render_targets[index].target == nullptr) { |
| 201 | return; | 201 | return; |
| 202 | } | 202 | } |
| 203 | render_targets[index].target->MarkAsRenderTarget(false); | 203 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 204 | render_targets[index].target = nullptr; | 204 | render_targets[index].target = nullptr; |
| 205 | render_targets[index].view = nullptr; | 205 | render_targets[index].view = nullptr; |
| 206 | } | 206 | } |
| @@ -270,6 +270,16 @@ protected: | |||
| 270 | // and reading it from a sepparate buffer. | 270 | // and reading it from a sepparate buffer. |
| 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
| 272 | 272 | ||
| 273 | void ManageRenderTargetUnregister(TSurface& surface) { | ||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 275 | const u32 index = surface->GetRenderTarget(); | ||
| 276 | if (index == DEPTH_RT) { | ||
| 277 | maxwell3d.dirty_flags.zeta_buffer = true; | ||
| 278 | } else { | ||
| 279 | maxwell3d.dirty_flags.color_buffer.set(index, true); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 273 | void Register(TSurface surface) { | 283 | void Register(TSurface surface) { |
| 274 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 284 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 275 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | 285 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); |
| @@ -294,6 +304,9 @@ protected: | |||
| 294 | if (guard_render_targets && surface->IsProtected()) { | 304 | if (guard_render_targets && surface->IsProtected()) { |
| 295 | return; | 305 | return; |
| 296 | } | 306 | } |
| 307 | if (!guard_render_targets && surface->IsRenderTarget()) { | ||
| 308 | ManageRenderTargetUnregister(surface); | ||
| 309 | } | ||
| 297 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 310 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 298 | const CacheAddr cache_ptr = surface->GetCacheAddr(); | 311 | const CacheAddr cache_ptr = surface->GetCacheAddr(); |
| 299 | const std::size_t size = surface->GetSizeInBytes(); | 312 | const std::size_t size = surface->GetSizeInBytes(); |
| @@ -649,15 +662,6 @@ private: | |||
| 649 | } | 662 | } |
| 650 | return {current_surface, *view}; | 663 | return {current_surface, *view}; |
| 651 | } | 664 | } |
| 652 | // The next case is unsafe, so if we r in accurate GPU, just skip it | ||
| 653 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 654 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 655 | MatchTopologyResult::FullMatch); | ||
| 656 | } | ||
| 657 | // This is the case the texture is a part of the parent. | ||
| 658 | if (current_surface->MatchesSubTexture(params, gpu_addr)) { | ||
| 659 | return RebuildSurface(current_surface, params, is_render); | ||
| 660 | } | ||
| 661 | } else { | 665 | } else { |
| 662 | // If there are many overlaps, odds are they are subtextures of the candidate | 666 | // If there are many overlaps, odds are they are subtextures of the candidate |
| 663 | // surface. We try to construct a new surface based on the candidate parameters, | 667 | // surface. We try to construct a new surface based on the candidate parameters, |
| @@ -793,6 +797,9 @@ private: | |||
| 793 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | 797 | static constexpr u64 registry_page_size{1 << registry_page_bits}; |
| 794 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | 798 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; |
| 795 | 799 | ||
| 800 | static constexpr u32 DEPTH_RT = 8; | ||
| 801 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 802 | |||
| 796 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 803 | // The L1 Cache is used for fast texture lookup before checking the overlaps |
| 797 | // This avoids calculating size and other stuffs. | 804 | // This avoids calculating size and other stuffs. |
| 798 | std::unordered_map<CacheAddr, TSurface> l1_cache; | 805 | std::unordered_map<CacheAddr, TSurface> l1_cache; |