summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.h36
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp107
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp1374
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h331
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h2
-rw-r--r--src/yuzu/configuration/config.cpp3
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h4
15 files changed, 425 insertions, 1454 deletions
diff --git a/src/core/settings.h b/src/core/settings.h
index a7f1e5fa0..7150d9755 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -129,6 +129,7 @@ struct Values {
129 // Renderer 129 // Renderer
130 float resolution_factor; 130 float resolution_factor;
131 bool toggle_framelimit; 131 bool toggle_framelimit;
132 bool use_accurate_framebuffers;
132 133
133 float bg_red; 134 float bg_red;
134 float bg_green; 135 float bg_green;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index a60aa1143..270d68222 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -161,6 +161,8 @@ TelemetrySession::TelemetrySession() {
161 Settings::values.resolution_factor); 161 Settings::values.resolution_factor);
162 AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit", 162 AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
163 Settings::values.toggle_framelimit); 163 Settings::values.toggle_framelimit);
164 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers",
165 Settings::values.use_accurate_framebuffers);
164 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 166 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
165 Settings::values.use_docked_mode); 167 Settings::values.use_docked_mode);
166} 168}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 2dc251205..180be4ff4 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -321,6 +321,24 @@ public:
321 INSERT_PADDING_WORDS(1); 321 INSERT_PADDING_WORDS(1);
322 }; 322 };
323 323
324 struct RenderTargetConfig {
325 u32 address_high;
326 u32 address_low;
327 u32 width;
328 u32 height;
329 Tegra::RenderTargetFormat format;
330 u32 block_dimensions;
331 u32 array_mode;
332 u32 layer_stride;
333 u32 base_layer;
334 INSERT_PADDING_WORDS(7);
335
336 GPUVAddr Address() const {
337 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
338 address_low);
339 }
340 };
341
324 union { 342 union {
325 struct { 343 struct {
326 INSERT_PADDING_WORDS(0x45); 344 INSERT_PADDING_WORDS(0x45);
@@ -333,23 +351,7 @@ public:
333 351
334 INSERT_PADDING_WORDS(0x1B8); 352 INSERT_PADDING_WORDS(0x1B8);
335 353
336 struct { 354 RenderTargetConfig rt[NumRenderTargets];
337 u32 address_high;
338 u32 address_low;
339 u32 width;
340 u32 height;
341 Tegra::RenderTargetFormat format;
342 u32 block_dimensions;
343 u32 array_mode;
344 u32 layer_stride;
345 u32 base_layer;
346 INSERT_PADDING_WORDS(7);
347
348 GPUVAddr Address() const {
349 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
350 address_low);
351 }
352 } rt[NumRenderTargets];
353 355
354 struct { 356 struct {
355 f32 scale_x; 357 f32 scale_x;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f0e48a802..145e58334 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -51,9 +51,8 @@ public:
51 } 51 }
52 52
53 /// Attempt to use a faster method to display the framebuffer to screen 53 /// Attempt to use a faster method to display the framebuffer to screen
54 virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, 54 virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
55 VAddr framebuffer_addr, u32 pixel_stride, 55 u32 pixel_stride, ScreenInfo& screen_info) {
56 ScreenInfo& screen_info) {
57 return false; 56 return false;
58 } 57 }
59 58
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 3fbf8e1f9..62ee45a36 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
146 u64 size = end - start + 1; 146 u64 size = end - start + 1;
147 147
148 // Copy vertex array data 148 // Copy vertex array data
149 res_cache.FlushRegion(start, size, nullptr);
150 Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); 149 Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
151 150
152 // Bind the vertex array to the buffer at the current offset. 151 // Bind the vertex array to the buffer at the current offset.
@@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() {
325 std::tie(color_surface, depth_surface, surfaces_rect) = 324 std::tie(color_surface, depth_surface, surfaces_rect) =
326 res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); 325 res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
327 326
328 const u16 res_scale = color_surface != nullptr
329 ? color_surface->res_scale
330 : (depth_surface == nullptr ? 1u : depth_surface->res_scale);
331
332 MathUtil::Rectangle<u32> draw_rect{ 327 MathUtil::Rectangle<u32> draw_rect{
328 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
329 surfaces_rect.left, surfaces_rect.right)), // Left
330 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
331 surfaces_rect.bottom, surfaces_rect.top)), // Top
332 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
333 surfaces_rect.left, surfaces_rect.right)), // Right
333 static_cast<u32>( 334 static_cast<u32>(
334 std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, 335 std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
335 surfaces_rect.left, surfaces_rect.right)), // Left 336 surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
336 static_cast<u32>(
337 std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
338 surfaces_rect.bottom, surfaces_rect.top)), // Top
339 static_cast<u32>(
340 std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
341 surfaces_rect.left, surfaces_rect.right)), // Right
342 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
343 viewport_rect.bottom * res_scale,
344 surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
345 337
346 // Bind the framebuffer surfaces 338 // Bind the framebuffer surfaces
347 BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); 339 BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
348 340
349 // Sync the viewport 341 // Sync the viewport
350 SyncViewport(surfaces_rect, res_scale); 342 SyncViewport(surfaces_rect);
351 343
352 // Sync the blend state registers 344 // Sync the blend state registers
353 SyncBlendState(); 345 SyncBlendState();
@@ -442,19 +434,11 @@ void RasterizerOpenGL::DrawArrays() {
442 state.Apply(); 434 state.Apply();
443 435
444 // Mark framebuffer surfaces as dirty 436 // Mark framebuffer surfaces as dirty
445 MathUtil::Rectangle<u32> draw_rect_unscaled{
446 draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
447 draw_rect.bottom / res_scale};
448
449 if (color_surface != nullptr && write_color_fb) { 437 if (color_surface != nullptr && write_color_fb) {
450 auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); 438 res_cache.MarkSurfaceAsDirty(color_surface);
451 res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
452 color_surface);
453 } 439 }
454 if (depth_surface != nullptr && write_depth_fb) { 440 if (depth_surface != nullptr && write_depth_fb) {
455 auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); 441 res_cache.MarkSurfaceAsDirty(depth_surface);
456 res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
457 depth_surface);
458 } 442 }
459} 443}
460 444
@@ -462,7 +446,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
462 446
463void RasterizerOpenGL::FlushAll() { 447void RasterizerOpenGL::FlushAll() {
464 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 448 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
465 res_cache.FlushAll(); 449 res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
466} 450}
467 451
468void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { 452void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
@@ -472,13 +456,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
472 456
473void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { 457void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
474 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 458 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
475 res_cache.InvalidateRegion(addr, size, nullptr); 459 res_cache.InvalidateRegion(addr, size);
476} 460}
477 461
478void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { 462void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
479 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 463 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
480 res_cache.FlushRegion(addr, size); 464 res_cache.FlushRegion(addr, size);
481 res_cache.InvalidateRegion(addr, size, nullptr); 465 res_cache.InvalidateRegion(addr, size);
482} 466}
483 467
484bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { 468bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
@@ -497,45 +481,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {
497 return true; 481 return true;
498} 482}
499 483
500bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, 484bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
501 VAddr framebuffer_addr, u32 pixel_stride, 485 VAddr framebuffer_addr, u32 pixel_stride,
502 ScreenInfo& screen_info) { 486 ScreenInfo& screen_info) {
503 if (framebuffer_addr == 0) { 487 if (!framebuffer_addr) {
504 return false; 488 return {};
505 } 489 }
490
506 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 491 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
507 492
508 SurfaceParams src_params; 493 const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
509 src_params.cpu_addr = framebuffer_addr; 494 if (!surface) {
510 src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); 495 return {};
511 src_params.width = std::min(framebuffer.width, pixel_stride);
512 src_params.height = framebuffer.height;
513 src_params.stride = pixel_stride;
514 src_params.is_tiled = true;
515 src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
516 src_params.pixel_format =
517 SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
518 src_params.component_type =
519 SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
520 src_params.UpdateParams();
521
522 MathUtil::Rectangle<u32> src_rect;
523 Surface src_surface;
524 std::tie(src_surface, src_rect) =
525 res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
526
527 if (src_surface == nullptr) {
528 return false;
529 } 496 }
530 497
531 u32 scaled_width = src_surface->GetScaledWidth(); 498 // Verify that the cached surface is the same size and format as the requested framebuffer
532 u32 scaled_height = src_surface->GetScaledHeight(); 499 const auto& params{surface->GetSurfaceParams()};
533 500 const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
534 screen_info.display_texcoords = MathUtil::Rectangle<float>( 501 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
535 (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, 502 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
536 (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); 503 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
537 504
538 screen_info.display_texture = src_surface->texture.handle; 505 screen_info.display_texture = surface->Texture().handle;
539 506
540 return true; 507 return true;
541} 508}
@@ -674,7 +641,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
674 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 641 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
675 Surface surface = res_cache.GetTextureSurface(texture); 642 Surface surface = res_cache.GetTextureSurface(texture);
676 if (surface != nullptr) { 643 if (surface != nullptr) {
677 state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; 644 state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
678 state.texture_units[current_bindpoint].swizzle.r = 645 state.texture_units[current_bindpoint].swizzle.r =
679 MaxwellToGL::SwizzleSource(texture.tic.x_source); 646 MaxwellToGL::SwizzleSource(texture.tic.x_source);
680 state.texture_units[current_bindpoint].swizzle.g = 647 state.texture_units[current_bindpoint].swizzle.g =
@@ -700,16 +667,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
700 state.Apply(); 667 state.Apply();
701 668
702 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 669 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
703 color_surface != nullptr ? color_surface->texture.handle : 0, 0); 670 color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
704 if (depth_surface != nullptr) { 671 if (depth_surface != nullptr) {
705 if (has_stencil) { 672 if (has_stencil) {
706 // attach both depth and stencil 673 // attach both depth and stencil
707 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 674 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
708 depth_surface->texture.handle, 0); 675 depth_surface->Texture().handle, 0);
709 } else { 676 } else {
710 // attach depth 677 // attach depth
711 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, 678 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
712 depth_surface->texture.handle, 0); 679 depth_surface->Texture().handle, 0);
713 // clear stencil attachment 680 // clear stencil attachment
714 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 681 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
715 } 682 }
@@ -720,14 +687,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
720 } 687 }
721} 688}
722 689
723void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { 690void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
724 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 691 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
725 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; 692 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
726 693
727 state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; 694 state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
728 state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; 695 state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
729 state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); 696 state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
730 state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); 697 state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
731} 698}
732 699
733void RasterizerOpenGL::SyncClipEnabled() { 700void RasterizerOpenGL::SyncClipEnabled() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4762983c9..621200f03 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -109,7 +109,7 @@ private:
109 u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); 109 u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
110 110
111 /// Syncs the viewport to match the guest state 111 /// Syncs the viewport to match the guest state
112 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); 112 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
113 113
114 /// Syncs the clip enabled status to match the guest state 114 /// Syncs the clip enabled status to match the guest state
115 void SyncClipEnabled(); 115 void SyncClipEnabled();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 857164ff6..63f5999ea 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -1,37 +1,23 @@
1// Copyright 2015 Citra Emulator Project 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <atomic>
7#include <cstring>
8#include <iterator>
9#include <memory>
10#include <utility>
11#include <vector>
12#include <boost/optional.hpp>
13#include <boost/range/iterator_range.hpp>
14#include <glad/glad.h> 6#include <glad/glad.h>
7
15#include "common/alignment.h" 8#include "common/alignment.h"
16#include "common/bit_field.h" 9#include "common/assert.h"
17#include "common/color.h"
18#include "common/logging/log.h"
19#include "common/math_util.h"
20#include "common/microprofile.h" 10#include "common/microprofile.h"
21#include "common/scope_exit.h" 11#include "common/scope_exit.h"
22#include "core/core.h" 12#include "core/core.h"
23#include "core/frontend/emu_window.h"
24#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
25#include "core/hle/kernel/vm_manager.h"
26#include "core/memory.h" 14#include "core/memory.h"
27#include "core/settings.h" 15#include "core/settings.h"
28#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
29#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 17#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
30#include "video_core/renderer_opengl/gl_state.h"
31#include "video_core/textures/astc.h" 18#include "video_core/textures/astc.h"
32#include "video_core/textures/decoders.h" 19#include "video_core/textures/decoders.h"
33#include "video_core/utils.h" 20#include "video_core/utils.h"
34#include "video_core/video_core.h"
35 21
36using SurfaceType = SurfaceParams::SurfaceType; 22using SurfaceType = SurfaceParams::SurfaceType;
37using PixelFormat = SurfaceParams::PixelFormat; 23using PixelFormat = SurfaceParams::PixelFormat;
@@ -44,6 +30,40 @@ struct FormatTuple {
44 bool compressed; 30 bool compressed;
45}; 31};
46 32
33/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
34 const Tegra::Texture::FullTextureInfo& config) {
35
36 SurfaceParams params{};
37 params.addr = config.tic.Address();
38 params.is_tiled = config.tic.IsTiled();
39 params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
40 params.pixel_format = PixelFormatFromTextureFormat(config.tic.format);
41 params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
42 params.type = GetFormatType(params.pixel_format);
43 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
44 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
45 params.unaligned_height = config.tic.Height();
46 params.size_in_bytes = params.SizeInBytes();
47 return params;
48}
49
50/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
51 const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
52
53 SurfaceParams params{};
54 params.addr = config.Address();
55 params.is_tiled = true;
56 params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
57 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
58 params.component_type = ComponentTypeFromRenderTarget(config.format);
59 params.type = GetFormatType(params.pixel_format);
60 params.width = config.width;
61 params.height = config.height;
62 params.unaligned_height = config.height;
63 params.size_in_bytes = params.SizeInBytes();
64 return params;
65}
66
47static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ 67static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
48 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 68 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8
49 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5 69 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5
@@ -63,8 +83,8 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
63 const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); 83 const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
64 if (type == SurfaceType::ColorTexture) { 84 if (type == SurfaceType::ColorTexture) {
65 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); 85 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
66 // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are 86 // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which
67 // type FLOAT 87 // are type FLOAT
68 ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F || 88 ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
69 pixel_format == PixelFormat::R11FG11FB10F); 89 pixel_format == PixelFormat::R11FG11FB10F);
70 return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; 90 return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
@@ -77,65 +97,70 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
77 return {}; 97 return {};
78} 98}
79 99
80template <typename Map, typename Interval> 100VAddr SurfaceParams::GetCpuAddr() const {
81constexpr auto RangeFromInterval(Map& map, const Interval& interval) { 101 const auto& gpu = Core::System::GetInstance().GPU();
82 return boost::make_iterator_range(map.equal_range(interval)); 102 return *gpu.memory_manager->GpuToCpuAddress(addr);
83} 103}
84 104
85static u16 GetResolutionScaleFactor() { 105static bool IsPixelFormatASTC(PixelFormat format) {
86 return static_cast<u16>(!Settings::values.resolution_factor 106 switch (format) {
87 ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() 107 case PixelFormat::ASTC_2D_4X4:
88 : Settings::values.resolution_factor); 108 return true;
109 default:
110 return false;
111 }
89} 112}
90 113
91static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { 114static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
92 u32 block_width{};
93 u32 block_height{};
94
95 switch (format) { 115 switch (format) {
96 case PixelFormat::ASTC_2D_4X4: 116 case PixelFormat::ASTC_2D_4X4:
97 block_width = 4; 117 return {4, 4};
98 block_height = 4;
99 break;
100 default: 118 default:
101 NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); 119 NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
102 UNREACHABLE(); 120 UNREACHABLE();
103 } 121 }
122}
123
124MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
125 u32 actual_height{unaligned_height};
126 if (IsPixelFormatASTC(pixel_format)) {
127 // ASTC formats must stop at the ATSC block size boundary
128 actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
129 }
130 return {0, actual_height, width, 0};
131}
104 132
133static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
134 u32 block_width{};
135 u32 block_height{};
136 std::tie(block_width, block_height) = GetASTCBlockSize(format);
105 data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); 137 data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
106} 138}
107 139
108template <bool morton_to_gl, PixelFormat format> 140template <bool morton_to_gl, PixelFormat format>
109void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, 141void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {
110 Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
111 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; 142 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
112 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); 143 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
113 const auto& gpu = Core::System::GetInstance().GPU(); 144 const auto& gpu = Core::System::GetInstance().GPU();
114 145
115 if (morton_to_gl) { 146 if (morton_to_gl) {
116 auto data = Tegra::Texture::UnswizzleTexture( 147 auto data = Tegra::Texture::UnswizzleTexture(
117 *gpu.memory_manager->GpuToCpuAddress(base), 148 *gpu.memory_manager->GpuToCpuAddress(addr),
118 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); 149 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
119 150
120 if (SurfaceParams::IsFormatASTC(format)) {
121 // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
122 ConvertASTCToRGBA8(data, format, stride, height);
123 }
124
125 std::memcpy(gl_buffer, data.data(), data.size()); 151 std::memcpy(gl_buffer, data.data(), data.size());
126 } else { 152 } else {
127 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check 153 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
128 // the configuration for this and perform more generic un/swizzle 154 // check the configuration for this and perform more generic un/swizzle
129 NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); 155 NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
130 VideoCore::MortonCopyPixels128( 156 VideoCore::MortonCopyPixels128(
131 stride, height, bytes_per_pixel, gl_bytes_per_pixel, 157 stride, height, bytes_per_pixel, gl_bytes_per_pixel,
132 Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, 158 Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer,
133 morton_to_gl); 159 morton_to_gl);
134 } 160 }
135} 161}
136 162
137static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, 163static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
138 Tegra::GPUVAddr),
139 SurfaceParams::MaxPixelFormat> 164 SurfaceParams::MaxPixelFormat>
140 morton_to_gl_fns = { 165 morton_to_gl_fns = {
141 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, 166 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
@@ -146,8 +171,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
146 MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, 171 MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
147}; 172};
148 173
149static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, 174static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
150 Tegra::GPUVAddr),
151 SurfaceParams::MaxPixelFormat> 175 SurfaceParams::MaxPixelFormat>
152 gl_to_morton_fns = { 176 gl_to_morton_fns = {
153 MortonCopy<false, PixelFormat::ABGR8>, 177 MortonCopy<false, PixelFormat::ABGR8>,
@@ -192,374 +216,79 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
192 cur_state.Apply(); 216 cur_state.Apply();
193} 217}
194 218
195static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, 219CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
196 const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, 220 texture.Create();
197 GLuint read_fb_handle, GLuint draw_fb_handle) { 221 const auto& rect{params.GetRect()};
198 222 AllocateSurfaceTexture(texture.handle,
199 glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, 223 GetFormatTuple(params.pixel_format, params.component_type),
200 GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), 224 rect.GetWidth(), rect.GetHeight());
201 src_rect.GetHeight(), 0);
202 return true;
203}
204
205static bool FillSurface(const Surface& surface, const u8* fill_data,
206 const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) {
207 UNREACHABLE();
208 return {};
209}
210
211SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const {
212 SurfaceParams params = *this;
213 const u32 tiled_size = is_tiled ? 8 : 1;
214 const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size);
215 Tegra::GPUVAddr aligned_start =
216 addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes);
217 Tegra::GPUVAddr aligned_end =
218 addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes);
219
220 if (aligned_end - aligned_start > stride_tiled_bytes) {
221 params.addr = aligned_start;
222 params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride));
223 } else {
224 // 1 row
225 ASSERT(aligned_end - aligned_start == stride_tiled_bytes);
226 const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1);
227 aligned_start =
228 addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment);
229 aligned_end =
230 addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment);
231 params.addr = aligned_start;
232 params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size);
233 params.stride = params.width;
234 params.height = tiled_size;
235 }
236 params.UpdateParams();
237
238 return params;
239}
240
241SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const {
242 if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) {
243 return {};
244 }
245
246 if (is_tiled) {
247 unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8;
248 unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8;
249 unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8;
250 unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8;
251 }
252
253 const u32 stride_tiled = !is_tiled ? stride : stride * 8;
254
255 const u32 pixel_offset =
256 stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) +
257 unscaled_rect.left;
258
259 const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth();
260
261 return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)};
262}
263
264MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const {
265 const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr));
266
267 if (is_tiled) {
268 const int x0 = (begin_pixel_index % (stride * 8)) / 8;
269 const int y0 = (begin_pixel_index / (stride * 8)) * 8;
270 // Top to bottom
271 return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width,
272 height - (y0 + sub_surface.height));
273 }
274
275 const int x0 = begin_pixel_index % stride;
276 const int y0 = begin_pixel_index / stride;
277 // Bottom to top
278 return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0);
279}
280
281MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const {
282 auto rect = GetSubRect(sub_surface);
283 rect.left = rect.left * res_scale;
284 rect.right = rect.right * res_scale;
285 rect.top = rect.top * res_scale;
286 rect.bottom = rect.bottom * res_scale;
287 return rect;
288}
289
290bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
291 return std::tie(other_surface.addr, other_surface.width, other_surface.height,
292 other_surface.stride, other_surface.block_height, other_surface.pixel_format,
293 other_surface.component_type,
294 other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height,
295 pixel_format, component_type, is_tiled) &&
296 pixel_format != PixelFormat::Invalid;
297}
298
299bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
300 return sub_surface.addr >= addr && sub_surface.end <= end &&
301 sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
302 sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
303 sub_surface.component_type == component_type &&
304 (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
305 (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
306 GetSubRect(sub_surface).left + sub_surface.width <= stride;
307}
308
309bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
310 return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
311 addr <= expanded_surface.end && expanded_surface.addr <= end &&
312 is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
313 component_type == expanded_surface.component_type && stride == expanded_surface.stride &&
314 (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
315 BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
316 0;
317}
318
319bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
320 if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr ||
321 end < texcopy_params.end) {
322 return false;
323 }
324 if (texcopy_params.block_height != block_height ||
325 texcopy_params.component_type != component_type)
326 return false;
327
328 if (texcopy_params.width != texcopy_params.stride) {
329 const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
330 return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
331 texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
332 (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) &&
333 ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride;
334 }
335 return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval();
336}
337
338VAddr SurfaceParams::GetCpuAddr() const {
339 // When this function is used, only cpu_addr or (GPU) addr should be set, not both
340 ASSERT(!(cpu_addr && addr));
341 const auto& gpu = Core::System::GetInstance().GPU();
342 return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr));
343}
344
345bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
346 SurfaceInterval fill_interval) const {
347 if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
348 boost::icl::first(fill_interval) >= addr &&
349 boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
350 dest_surface.FromInterval(fill_interval).GetInterval() ==
351 fill_interval) { // make sure interval is a rectangle in dest surface
352 if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) {
353 // Check if bits repeat for our fill_size
354 const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u);
355 std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel);
356
357 for (u32 i = 0; i < dest_bytes_per_pixel; ++i)
358 std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size);
359
360 for (u32 i = 0; i < fill_size; ++i)
361 if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0],
362 dest_bytes_per_pixel) != 0)
363 return false;
364
365 if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4))
366 return false;
367 }
368 return true;
369 }
370 return false;
371}
372
373bool CachedSurface::CanCopy(const SurfaceParams& dest_surface,
374 SurfaceInterval copy_interval) const {
375 SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
376 ASSERT(subrect_params.GetInterval() == copy_interval);
377 if (CanSubRect(subrect_params))
378 return true;
379
380 if (CanFill(dest_surface, copy_interval))
381 return true;
382
383 return false;
384}
385
386SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const {
387 SurfaceInterval result{};
388 const auto valid_regions =
389 SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions;
390 for (auto& valid_interval : valid_regions) {
391 const SurfaceInterval aligned_interval{
392 addr + Common::AlignUp(boost::icl::first(valid_interval) - addr,
393 BytesInPixels(is_tiled ? 8 * 8 : 1)),
394 addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr,
395 BytesInPixels(is_tiled ? 8 * 8 : 1))};
396
397 if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) ||
398 boost::icl::length(aligned_interval) == 0) {
399 continue;
400 }
401
402 // Get the rectangle within aligned_interval
403 const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1);
404 SurfaceInterval rect_interval{
405 addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes),
406 addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes),
407 };
408 if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) {
409 // 1 row
410 rect_interval = aligned_interval;
411 } else if (boost::icl::length(rect_interval) == 0) {
412 // 2 rows that do not make a rectangle, return the larger one
413 const SurfaceInterval row1{boost::icl::first(aligned_interval),
414 boost::icl::first(rect_interval)};
415 const SurfaceInterval row2{boost::icl::first(rect_interval),
416 boost::icl::last_next(aligned_interval)};
417 rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2;
418 }
419
420 if (boost::icl::length(rect_interval) > boost::icl::length(result)) {
421 result = rect_interval;
422 }
423 }
424 return result;
425}
426
427void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
428 SurfaceInterval copy_interval) {
429 SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval);
430 ASSERT(subrect_params.GetInterval() == copy_interval);
431
432 ASSERT(src_surface != dst_surface);
433
434 // This is only called when CanCopy is true, no need to run checks here
435 if (src_surface->type == SurfaceType::Fill) {
436 // FillSurface needs a 4 bytes buffer
437 const u64 fill_offset =
438 (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size;
439 std::array<u8, 4> fill_buffer;
440
441 u64 fill_buff_pos = fill_offset;
442 for (int i : {0, 1, 2, 3})
443 fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size];
444
445 FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params),
446 draw_framebuffer.handle);
447 return;
448 }
449 if (src_surface->CanSubRect(subrect_params)) {
450 BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params),
451 dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params),
452 src_surface->type, read_framebuffer.handle, draw_framebuffer.handle);
453 return;
454 }
455 UNREACHABLE();
456} 225}
457 226
458MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); 227MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
459void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { 228void CachedSurface::LoadGLBuffer() {
460 ASSERT(type != SurfaceType::Fill); 229 ASSERT(params.type != SurfaceType::Fill);
461 230
462 u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); 231 u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
463 if (texture_src_data == nullptr)
464 return;
465 232
466 if (gl_buffer == nullptr) { 233 ASSERT(texture_src_data);
467 gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format);
468 gl_buffer.reset(new u8[gl_buffer_size]);
469 }
470 234
471 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); 235 gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
472 236
473 ASSERT(load_start >= addr && load_end <= end); 237 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
474 const u64 start_offset = load_start - addr;
475 238
476 if (!is_tiled) { 239 if (!params.is_tiled) {
477 const u32 bytes_per_pixel{GetFormatBpp() >> 3}; 240 const u32 bytes_per_pixel{params.GetFormatBpp() >> 3};
478 241
479 std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, 242 std::memcpy(gl_buffer.data(), texture_src_data,
480 bytes_per_pixel * width * height); 243 bytes_per_pixel * params.width * params.height);
481 } else { 244 } else {
482 morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, 245 morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
483 GetActualHeight(), &gl_buffer[0], addr, 246 params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
484 load_start, load_end); 247 }
248
249 if (IsPixelFormatASTC(params.pixel_format)) {
250 // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
251 ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height);
485 } 252 }
486} 253}
487 254
488MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); 255MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
489void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { 256void CachedSurface::FlushGLBuffer() {
490 u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); 257 u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr());
491 if (dst_buffer == nullptr)
492 return;
493
494 ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format));
495 258
496 // TODO: Should probably be done in ::Memory:: and check for other regions too 259 ASSERT(dst_buffer);
497 // same as loadglbuffer() 260 ASSERT(gl_buffer.size() ==
498 if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) 261 params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
499 flush_end = Memory::VRAM_VADDR_END;
500
501 if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
502 flush_start = Memory::VRAM_VADDR;
503 262
504 MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); 263 MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
505 264
506 ASSERT(flush_start >= addr && flush_end <= end); 265 if (!params.is_tiled) {
507 const u64 start_offset = flush_start - addr; 266 std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
508 const u64 end_offset = flush_end - addr;
509
510 if (type == SurfaceType::Fill) {
511 const u64 coarse_start_offset = start_offset - (start_offset % fill_size);
512 const u64 backup_bytes = start_offset % fill_size;
513 std::array<u8, 4> backup_data;
514 if (backup_bytes)
515 std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes);
516
517 for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
518 std::memcpy(&dst_buffer[offset], &fill_data[0],
519 std::min(fill_size, end_offset - offset));
520 }
521
522 if (backup_bytes)
523 std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
524 } else if (!is_tiled) {
525 std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
526 } else { 267 } else {
527 gl_to_morton_fns[static_cast<size_t>(pixel_format)]( 268 gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
528 stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); 269 params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
529 } 270 }
530} 271}
531 272
532MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 273MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
533void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, 274void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
534 GLuint draw_fb_handle) { 275 if (params.type == SurfaceType::Fill)
535 if (type == SurfaceType::Fill)
536 return; 276 return;
537 277
538 MICROPROFILE_SCOPE(OpenGL_TextureUL); 278 MICROPROFILE_SCOPE(OpenGL_TextureUL);
539 279
540 ASSERT(gl_buffer_size == 280 ASSERT(gl_buffer.size() ==
541 GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); 281 params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
282
283 const auto& rect{params.GetRect()};
542 284
543 // Load data from memory to the surface 285 // Load data from memory to the surface
544 GLint x0 = static_cast<GLint>(rect.left); 286 GLint x0 = static_cast<GLint>(rect.left);
545 GLint y0 = static_cast<GLint>(rect.bottom); 287 GLint y0 = static_cast<GLint>(rect.bottom);
546 size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); 288 size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format);
547 289
548 const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); 290 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
549 GLuint target_tex = texture.handle; 291 GLuint target_tex = texture.handle;
550
551 // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
552 // surface
553 OGLTexture unscaled_tex;
554 if (res_scale != 1) {
555 x0 = 0;
556 y0 = 0;
557
558 unscaled_tex.Create();
559 AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
560 target_tex = unscaled_tex.handle;
561 }
562
563 OpenGLState cur_state = OpenGLState::GetCurState(); 292 OpenGLState cur_state = OpenGLState::GetCurState();
564 293
565 GLuint old_tex = cur_state.texture_units[0].texture_2d; 294 GLuint old_tex = cur_state.texture_units[0].texture_2d;
@@ -567,15 +296,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
567 cur_state.Apply(); 296 cur_state.Apply();
568 297
569 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT 298 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
570 ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); 299 ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
571 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); 300 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));
572 301
573 glActiveTexture(GL_TEXTURE0); 302 glActiveTexture(GL_TEXTURE0);
574 if (tuple.compressed) { 303 if (tuple.compressed) {
575 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, 304 glCompressedTexImage2D(
576 static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), 305 GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
577 static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, 306 static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
578 static_cast<GLsizei>(size), &gl_buffer[buffer_offset]); 307 &gl_buffer[buffer_offset]);
579 } else { 308 } else {
580 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 309 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
581 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 310 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
@@ -586,827 +315,238 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
586 315
587 cur_state.texture_units[0].texture_2d = old_tex; 316 cur_state.texture_units[0].texture_2d = old_tex;
588 cur_state.Apply(); 317 cur_state.Apply();
589
590 if (res_scale != 1) {
591 auto scaled_rect = rect;
592 scaled_rect.left *= res_scale;
593 scaled_rect.top *= res_scale;
594 scaled_rect.right *= res_scale;
595 scaled_rect.bottom *= res_scale;
596
597 BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle,
598 scaled_rect, type, read_fb_handle, draw_fb_handle);
599 }
600} 318}
601 319
602MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); 320MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
603void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, 321void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
604 GLuint draw_fb_handle) { 322 if (params.type == SurfaceType::Fill)
605 if (type == SurfaceType::Fill)
606 return; 323 return;
607 324
608 MICROPROFILE_SCOPE(OpenGL_TextureDL); 325 MICROPROFILE_SCOPE(OpenGL_TextureDL);
609 326
610 if (gl_buffer == nullptr) { 327 gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
611 gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format);
612 gl_buffer.reset(new u8[gl_buffer_size]);
613 }
614 328
615 OpenGLState state = OpenGLState::GetCurState(); 329 OpenGLState state = OpenGLState::GetCurState();
616 OpenGLState prev_state = state; 330 OpenGLState prev_state = state;
617 SCOPE_EXIT({ prev_state.Apply(); }); 331 SCOPE_EXIT({ prev_state.Apply(); });
618 332
619 const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); 333 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
620 334
621 // Ensure no bad interactions with GL_PACK_ALIGNMENT 335 // Ensure no bad interactions with GL_PACK_ALIGNMENT
622 ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); 336 ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
623 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); 337 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
624 size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format);
625
626 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
627 if (res_scale != 1) {
628 auto scaled_rect = rect;
629 scaled_rect.left *= res_scale;
630 scaled_rect.top *= res_scale;
631 scaled_rect.right *= res_scale;
632 scaled_rect.bottom *= res_scale;
633
634 OGLTexture unscaled_tex;
635 unscaled_tex.Create();
636
637 MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
638 AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
639 BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type,
640 read_fb_handle, draw_fb_handle);
641
642 state.texture_units[0].texture_2d = unscaled_tex.handle;
643 state.Apply();
644
645 glActiveTexture(GL_TEXTURE0);
646 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
647 } else {
648 state.UnbindTexture(texture.handle);
649 state.draw.read_framebuffer = read_fb_handle;
650 state.Apply();
651
652 if (type == SurfaceType::ColorTexture) {
653 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
654 texture.handle, 0);
655 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
656 0, 0);
657 } else if (type == SurfaceType::Depth) {
658 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
659 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
660 texture.handle, 0);
661 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
662 } else {
663 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
664 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
665 texture.handle, 0);
666 }
667 glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
668 static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
669 tuple.format, tuple.type, &gl_buffer[buffer_offset]);
670 }
671 338
672 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 339 const auto& rect{params.GetRect()};
673} 340 size_t buffer_offset =
674 341 (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format);
675enum class MatchFlags {
676 None = 0,
677 Invalid = 1, // Flag that can be applied to other match types, invalid matches require
678 // validation before they can be used
679 Exact = 1 << 1, // Surfaces perfectly match
680 SubRect = 1 << 2, // Surface encompasses params
681 Copy = 1 << 3, // Surface we can copy from
682 Expand = 1 << 4, // Surface that can expand params
683 TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters
684};
685
686constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) {
687 return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
688}
689 342
690constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { 343 state.UnbindTexture(texture.handle);
691 return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); 344 state.draw.read_framebuffer = read_fb_handle;
692} 345 state.Apply();
693 346
694/// Get the best surface match (and its match type) for the given flags 347 if (params.type == SurfaceType::ColorTexture) {
695template <MatchFlags find_flags> 348 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
696Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, 349 texture.handle, 0);
697 ScaleMatch match_scale_type, 350 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
698 boost::optional<SurfaceInterval> validate_interval = boost::none) { 351 0);
699 Surface match_surface = nullptr; 352 } else if (params.type == SurfaceType::Depth) {
700 bool match_valid = false; 353 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
701 u32 match_scale = 0; 354 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
702 SurfaceInterval match_interval{}; 355 texture.handle, 0);
703 356 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
704 for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { 357 } else {
705 for (auto& surface : pair.second) { 358 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
706 bool res_scale_matched = match_scale_type == ScaleMatch::Exact 359 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
707 ? (params.res_scale == surface->res_scale) 360 texture.handle, 0);
708 : (params.res_scale <= surface->res_scale);
709 // validity will be checked in GetCopyableInterval
710 bool is_valid =
711 (find_flags & MatchFlags::Copy) != MatchFlags::None
712 ? true
713 : surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
714
715 if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid)
716 continue;
717
718 auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
719 if ((find_flags & check_type) == MatchFlags::None)
720 return;
721
722 bool matched;
723 SurfaceInterval surface_interval;
724 std::tie(matched, surface_interval) = match_fn();
725 if (!matched)
726 return;
727
728 if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore &&
729 surface->type != SurfaceType::Fill)
730 return;
731
732 // Found a match, update only if this is better than the previous one
733 auto UpdateMatch = [&] {
734 match_surface = surface;
735 match_valid = is_valid;
736 match_scale = surface->res_scale;
737 match_interval = surface_interval;
738 };
739
740 if (surface->res_scale > match_scale) {
741 UpdateMatch();
742 return;
743 } else if (surface->res_scale < match_scale) {
744 return;
745 }
746
747 if (is_valid && !match_valid) {
748 UpdateMatch();
749 return;
750 } else if (is_valid != match_valid) {
751 return;
752 }
753
754 if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) {
755 UpdateMatch();
756 }
757 };
758 IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] {
759 return std::make_pair(surface->ExactMatch(params), surface->GetInterval());
760 });
761 IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] {
762 return std::make_pair(surface->CanSubRect(params), surface->GetInterval());
763 });
764 IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
765 auto copy_interval =
766 params.FromInterval(*validate_interval).GetCopyableInterval(surface);
767 bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
768 surface->CanCopy(params, copy_interval);
769 return std::make_pair(matched, copy_interval);
770 });
771 IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
772 return std::make_pair(surface->CanExpand(params), surface->GetInterval());
773 });
774 IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
775 return std::make_pair(surface->CanTexCopy(params), surface->GetInterval());
776 });
777 }
778 } 361 }
779 return match_surface; 362 glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
363 static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
364 tuple.format, tuple.type, &gl_buffer[buffer_offset]);
365
366 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
780} 367}
781 368
782RasterizerCacheOpenGL::RasterizerCacheOpenGL() { 369RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
783 read_framebuffer.Create(); 370 read_framebuffer.Create();
784 draw_framebuffer.Create(); 371 draw_framebuffer.Create();
785
786 attributeless_vao.Create();
787
788 d24s8_abgr_buffer.Create();
789 d24s8_abgr_buffer_size = 0;
790
791 const char* vs_source = R"(
792#version 330 core
793const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
794void main() {
795 gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
796}
797)";
798 const char* fs_source = R"(
799#version 330 core
800
801uniform samplerBuffer tbo;
802uniform vec2 tbo_size;
803uniform vec4 viewport;
804
805out vec4 color;
806
807void main() {
808 vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw;
809 int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x);
810 color = texelFetch(tbo, tbo_offset).rabg;
811}
812)";
813 d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source);
814
815 OpenGLState state = OpenGLState::GetCurState();
816 GLuint old_program = state.draw.shader_program;
817 state.draw.shader_program = d24s8_abgr_shader.handle;
818 state.Apply();
819
820 GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo");
821 ASSERT(tbo_u_id != -1);
822 glUniform1i(tbo_u_id, 0);
823
824 state.draw.shader_program = old_program;
825 state.Apply();
826
827 d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size");
828 ASSERT(d24s8_abgr_tbo_size_u_id != -1);
829 d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport");
830 ASSERT(d24s8_abgr_viewport_u_id != -1);
831} 372}
832 373
833RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 374RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
834 FlushAll(); 375 while (!surface_cache.empty()) {
835 while (!surface_cache.empty()) 376 UnregisterSurface(surface_cache.begin()->second);
836 UnregisterSurface(*surface_cache.begin()->second.begin());
837}
838
839bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface,
840 const MathUtil::Rectangle<u32>& src_rect,
841 const Surface& dst_surface,
842 const MathUtil::Rectangle<u32>& dst_rect) {
843 if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format))
844 return false;
845
846 return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle,
847 dst_rect, src_surface->type, read_framebuffer.handle,
848 draw_framebuffer.handle);
849}
850
851void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex,
852 const MathUtil::Rectangle<u32>& src_rect,
853 GLuint dst_tex,
854 const MathUtil::Rectangle<u32>& dst_rect) {
855 OpenGLState prev_state = OpenGLState::GetCurState();
856 SCOPE_EXIT({ prev_state.Apply(); });
857
858 OpenGLState state;
859 state.draw.read_framebuffer = read_framebuffer.handle;
860 state.draw.draw_framebuffer = draw_framebuffer.handle;
861 state.Apply();
862
863 glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle);
864
865 GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4;
866 if (target_pbo_size > d24s8_abgr_buffer_size) {
867 d24s8_abgr_buffer_size = target_pbo_size * 2;
868 glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY);
869 }
870
871 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
872 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex,
873 0);
874 glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom),
875 static_cast<GLsizei>(src_rect.GetWidth()),
876 static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
877 0);
878
879 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
880
881 // PBO now contains src_tex in RABG format
882 state.draw.shader_program = d24s8_abgr_shader.handle;
883 state.draw.vertex_array = attributeless_vao.handle;
884 state.viewport.x = static_cast<GLint>(dst_rect.left);
885 state.viewport.y = static_cast<GLint>(dst_rect.bottom);
886 state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth());
887 state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight());
888 state.Apply();
889
890 OGLTexture tbo;
891 tbo.Create();
892 glActiveTexture(GL_TEXTURE0);
893 glBindTexture(GL_TEXTURE_BUFFER, tbo.handle);
894 glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle);
895
896 glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()),
897 static_cast<GLfloat>(src_rect.GetHeight()));
898 glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x),
899 static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width),
900 static_cast<GLfloat>(state.viewport.height));
901
902 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
903 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
904 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
905
906 glBindTexture(GL_TEXTURE_BUFFER, 0);
907}
908
909Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
910 bool load_if_create) {
911 if (params.addr == 0 || params.height * params.width == 0) {
912 return nullptr;
913 }
914 // Use GetSurfaceSubRect instead
915 ASSERT(params.width == params.stride);
916
917 // Check for an exact match in existing surfaces
918 Surface surface =
919 FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
920
921 if (surface == nullptr) {
922 u16 target_res_scale = params.res_scale;
923 if (match_res_scale != ScaleMatch::Exact) {
924 // This surface may have a subrect of another surface with a higher res_scale, find it
925 // to adjust our params
926 SurfaceParams find_params = params;
927 Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(
928 surface_cache, find_params, match_res_scale);
929 if (expandable != nullptr && expandable->res_scale > target_res_scale) {
930 target_res_scale = expandable->res_scale;
931 }
932 }
933 SurfaceParams new_params = params;
934 new_params.res_scale = target_res_scale;
935 surface = CreateSurface(new_params);
936 RegisterSurface(surface);
937 } 377 }
938
939 if (load_if_create) {
940 ValidateSurface(surface, params.addr, params.size);
941 }
942
943 return surface;
944}
945
946boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress(
947 VAddr cpu_addr) const {
948 // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
949 // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU
950 // addresses. We iterate through all cached framebuffers, and compare their starting CPU address
951 // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps
952 // surfaces.
953
954 std::vector<Tegra::GPUVAddr> gpu_addresses;
955 for (const auto& pair : surface_cache) {
956 for (const auto& surface : pair.second) {
957 const VAddr surface_cpu_addr = surface->GetCpuAddr();
958 if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) {
959 ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
960 gpu_addresses.push_back(surface->addr);
961 }
962 }
963 }
964
965 if (gpu_addresses.empty()) {
966 return {};
967 }
968
969 ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported");
970 return gpu_addresses[0];
971}
972
973SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params,
974 ScaleMatch match_res_scale,
975 bool load_if_create) {
976 if (params.addr == 0 || params.height * params.width == 0) {
977 return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{});
978 }
979
980 // Attempt to find encompassing surface
981 Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
982 match_res_scale);
983
984 // Check if FindMatch failed because of res scaling
985 // If that's the case create a new surface with
986 // the dimensions of the lower res_scale surface
987 // to suggest it should not be used again
988 if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) {
989 surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
990 ScaleMatch::Ignore);
991 if (surface != nullptr) {
992 ASSERT(surface->res_scale < params.res_scale);
993 SurfaceParams new_params = *surface;
994 new_params.res_scale = params.res_scale;
995
996 surface = CreateSurface(new_params);
997 RegisterSurface(surface);
998 }
999 }
1000
1001 SurfaceParams aligned_params = params;
1002 if (params.is_tiled) {
1003 aligned_params.height = Common::AlignUp(params.height, 8);
1004 aligned_params.width = Common::AlignUp(params.width, 8);
1005 aligned_params.stride = Common::AlignUp(params.stride, 8);
1006 aligned_params.UpdateParams();
1007 }
1008
1009 // Check for a surface we can expand before creating a new one
1010 if (surface == nullptr) {
1011 surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params,
1012 match_res_scale);
1013 if (surface != nullptr) {
1014 aligned_params.width = aligned_params.stride;
1015 aligned_params.UpdateParams();
1016
1017 SurfaceParams new_params = *surface;
1018 new_params.addr = std::min(aligned_params.addr, surface->addr);
1019 new_params.end = std::max(aligned_params.end, surface->end);
1020 new_params.size = new_params.end - new_params.addr;
1021 new_params.height = static_cast<u32>(
1022 new_params.size / aligned_params.BytesInPixels(aligned_params.stride));
1023 ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0);
1024
1025 Surface new_surface = CreateSurface(new_params);
1026 DuplicateSurface(surface, new_surface);
1027
1028 // Delete the expanded surface, this can't be done safely yet
1029 // because it may still be in use
1030 remove_surfaces.emplace(surface);
1031
1032 surface = new_surface;
1033 RegisterSurface(new_surface);
1034 }
1035 }
1036
1037 // No subrect found - create and return a new surface
1038 if (surface == nullptr) {
1039 SurfaceParams new_params = aligned_params;
1040 // Can't have gaps in a surface
1041 new_params.width = aligned_params.stride;
1042 new_params.UpdateParams();
1043 // GetSurface will create the new surface and possibly adjust res_scale if necessary
1044 surface = GetSurface(new_params, match_res_scale, load_if_create);
1045 } else if (load_if_create) {
1046 ValidateSurface(surface, aligned_params.addr, aligned_params.size);
1047 }
1048
1049 return std::make_tuple(surface, surface->GetScaledSubRect(params));
1050} 378}
1051 379
1052Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { 380Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
1053 auto& gpu = Core::System::GetInstance().GPU(); 381 return GetSurface(SurfaceParams::CreateForTexture(config));
1054
1055 SurfaceParams params;
1056 params.addr = config.tic.Address();
1057 params.is_tiled = config.tic.IsTiled();
1058 params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
1059
1060 params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) /
1061 params.GetCompresssionFactor();
1062 params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) /
1063 params.GetCompresssionFactor();
1064
1065 // TODO(Subv): Different types per component are not supported.
1066 ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
1067 config.tic.r_type.Value() == config.tic.b_type.Value() &&
1068 config.tic.r_type.Value() == config.tic.a_type.Value());
1069
1070 params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value());
1071
1072 if (config.tic.IsTiled()) {
1073 params.block_height = config.tic.BlockHeight();
1074
1075 // TODO(bunnei): The below align up is a hack. This is here because some compressed textures
1076 // are not a multiple of their own compression factor, and so this accounts for that. This
1077 // could potentially result in an extra row of 4px being decoded if a texture is not a
1078 // multiple of 4.
1079 params.width = Common::AlignUp(params.width, 4);
1080 params.height = Common::AlignUp(params.height, 4);
1081 } else {
1082 // Use the texture-provided stride value if the texture isn't tiled.
1083 params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
1084 }
1085
1086 params.UpdateParams();
1087
1088 return GetSurface(params, ScaleMatch::Ignore, true);
1089} 382}
1090 383
1091SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( 384SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
1092 bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { 385 bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {
1093 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 386 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
1094 const auto& config = regs.rt[0];
1095 387
1096 // TODO(bunnei): This is hard corded to use just the first render buffer 388 // TODO(bunnei): This is hard corded to use just the first render buffer
1097 NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); 389 NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
1098 390
1099 // update resolution_scale_factor and reset cache if changed
1100 // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We
1101 // need to fix this before making the renderer multi-threaded.
1102 static u16 resolution_scale_factor = GetResolutionScaleFactor();
1103 if (resolution_scale_factor != GetResolutionScaleFactor()) {
1104 resolution_scale_factor = GetResolutionScaleFactor();
1105 FlushAll();
1106 while (!surface_cache.empty())
1107 UnregisterSurface(*surface_cache.begin()->second.begin());
1108 }
1109
1110 MathUtil::Rectangle<u32> viewport_clamped{
1111 static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))),
1112 static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))),
1113 static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))),
1114 static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))};
1115
1116 // get color and depth surfaces 391 // get color and depth surfaces
1117 SurfaceParams color_params; 392 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])};
1118 color_params.is_tiled = true; 393 const SurfaceParams depth_params{color_params};
1119 color_params.res_scale = resolution_scale_factor;
1120 color_params.width = config.width;
1121 color_params.height = config.height;
1122 // TODO(Subv): Can framebuffers use a different block height?
1123 color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
1124 SurfaceParams depth_params = color_params;
1125
1126 color_params.addr = config.Address();
1127 color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
1128 color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
1129 color_params.UpdateParams();
1130 394
1131 ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); 395 ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
1132 // depth_params.addr = config.GetDepthBufferPhysicalAddress();
1133 // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format);
1134 // depth_params.UpdateParams();
1135
1136 auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped);
1137 auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped);
1138
1139 // Make sure that framebuffers don't overlap if both color and depth are being used
1140 if (using_color_fb && using_depth_fb &&
1141 boost::icl::length(color_vp_interval & depth_vp_interval)) {
1142 NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
1143 "overlapping framebuffers not supported!");
1144 using_depth_fb = false;
1145 }
1146 396
1147 MathUtil::Rectangle<u32> color_rect{}; 397 MathUtil::Rectangle<u32> color_rect{};
1148 Surface color_surface = nullptr; 398 Surface color_surface;
1149 if (using_color_fb) 399 if (using_color_fb) {
1150 std::tie(color_surface, color_rect) = 400 color_surface = GetSurface(color_params);
1151 GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); 401 if (color_surface) {
402 color_rect = color_surface->GetSurfaceParams().GetRect();
403 }
404 }
1152 405
1153 MathUtil::Rectangle<u32> depth_rect{}; 406 MathUtil::Rectangle<u32> depth_rect{};
1154 Surface depth_surface = nullptr; 407 Surface depth_surface;
1155 if (using_depth_fb) 408 if (using_depth_fb) {
1156 std::tie(depth_surface, depth_rect) = 409 depth_surface = GetSurface(depth_params);
1157 GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); 410 if (depth_surface) {
411 depth_rect = depth_surface->GetSurfaceParams().GetRect();
412 }
413 }
1158 414
1159 MathUtil::Rectangle<u32> fb_rect{}; 415 MathUtil::Rectangle<u32> fb_rect{};
1160 if (color_surface != nullptr && depth_surface != nullptr) { 416 if (color_surface && depth_surface) {
1161 fb_rect = color_rect; 417 fb_rect = color_rect;
1162 // Color and Depth surfaces must have the same dimensions and offsets 418 // Color and Depth surfaces must have the same dimensions and offsets
1163 if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || 419 if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
1164 color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { 420 color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
1165 color_surface = GetSurface(color_params, ScaleMatch::Exact, false); 421 color_surface = GetSurface(color_params);
1166 depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); 422 depth_surface = GetSurface(depth_params);
1167 fb_rect = color_surface->GetScaledRect(); 423 fb_rect = color_surface->GetSurfaceParams().GetRect();
1168 } 424 }
1169 } else if (color_surface != nullptr) { 425 } else if (color_surface) {
1170 fb_rect = color_rect; 426 fb_rect = color_rect;
1171 } else if (depth_surface != nullptr) { 427 } else if (depth_surface) {
1172 fb_rect = depth_rect; 428 fb_rect = depth_rect;
1173 } 429 }
1174 430
1175 if (color_surface != nullptr) {
1176 ValidateSurface(color_surface, boost::icl::first(color_vp_interval),
1177 boost::icl::length(color_vp_interval));
1178 }
1179 if (depth_surface != nullptr) {
1180 ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval),
1181 boost::icl::length(depth_vp_interval));
1182 }
1183
1184 return std::make_tuple(color_surface, depth_surface, fb_rect); 431 return std::make_tuple(color_surface, depth_surface, fb_rect);
1185} 432}
1186 433
1187Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { 434void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
1188 UNREACHABLE(); 435 surface->LoadGLBuffer();
1189 return {}; 436 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
1190} 437}
1191 438
1192SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { 439void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
1193 MathUtil::Rectangle<u32> rect{}; 440 if (Settings::values.use_accurate_framebuffers) {
1194 441 // If enabled, always flush dirty surfaces
1195 Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( 442 surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
1196 surface_cache, params, ScaleMatch::Ignore); 443 surface->FlushGLBuffer();
1197 444 } else {
1198 if (match_surface != nullptr) { 445 // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
1199 ValidateSurface(match_surface, params.addr, params.size); 446 // and flushes are very slow and do not seem to improve accuracy
1200 447 const auto& params{surface->GetSurfaceParams()};
1201 SurfaceParams match_subrect; 448 Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
1202 if (params.width != params.stride) {
1203 const u32 tiled_size = match_surface->is_tiled ? 8 : 1;
1204 match_subrect = params;
1205 match_subrect.width =
1206 static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size);
1207 match_subrect.stride =
1208 static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size);
1209 match_subrect.height *= tiled_size;
1210 } else {
1211 match_subrect = match_surface->FromInterval(params.GetInterval());
1212 ASSERT(match_subrect.GetInterval() == params.GetInterval());
1213 }
1214
1215 rect = match_surface->GetScaledSubRect(match_subrect);
1216 } 449 }
1217
1218 return std::make_tuple(match_surface, rect);
1219} 450}
1220 451
1221void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, 452Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
1222 const Surface& dest_surface) { 453 if (params.addr == 0 || params.height * params.width == 0) {
1223 ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); 454 return {};
1224 455 }
1225 BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface,
1226 dest_surface->GetScaledSubRect(*src_surface));
1227
1228 dest_surface->invalid_regions -= src_surface->GetInterval();
1229 dest_surface->invalid_regions += src_surface->invalid_regions;
1230 456
1231 SurfaceRegions regions; 457 // Check for an exact match in existing surfaces
1232 for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { 458 const auto& surface_key{SurfaceKey::Create(params)};
1233 if (pair.second == src_surface) { 459 const auto& search{surface_cache.find(surface_key)};
1234 regions += pair.first; 460 Surface surface;
461 if (search != surface_cache.end()) {
462 surface = search->second;
463 if (Settings::values.use_accurate_framebuffers) {
464 // Reload the surface from Switch memory
465 LoadSurface(surface);
1235 } 466 }
467 } else {
468 surface = std::make_shared<CachedSurface>(params);
469 RegisterSurface(surface);
470 LoadSurface(surface);
1236 } 471 }
1237 for (auto& interval : regions) {
1238 dirty_regions.set({interval, dest_surface});
1239 }
1240}
1241
1242void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr,
1243 u64 size) {
1244 if (size == 0)
1245 return;
1246
1247 const SurfaceInterval validate_interval(addr, addr + size);
1248 472
1249 if (surface->type == SurfaceType::Fill) { 473 return surface;
1250 // Sanity check, fill surfaces will always be valid when used 474}
1251 ASSERT(surface->IsRegionValid(validate_interval));
1252 return;
1253 }
1254 475
1255 while (true) { 476Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
1256 const auto it = surface->invalid_regions.find(validate_interval); 477 // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
1257 if (it == surface->invalid_regions.end()) 478 // final output framebuffers are specified by CPU address, but internally our GPU cache uses
1258 break; 479 // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU
1259 480 // address to the one provided. This is obviously not great, and won't work if the
1260 const auto interval = *it & validate_interval; 481 // framebuffer overlaps surfaces.
1261 // Look for a valid surface to copy from 482
1262 SurfaceParams params = *surface; 483 std::vector<Surface> surfaces;
1263 484 for (const auto& surface : surface_cache) {
1264 Surface copy_surface = 485 const auto& params = surface.second->GetSurfaceParams();
1265 FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); 486 const VAddr surface_cpu_addr = params.GetCpuAddr();
1266 if (copy_surface != nullptr) { 487 if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) {
1267 SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); 488 ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
1268 CopySurface(copy_surface, surface, copy_interval); 489 surfaces.push_back(surface.second);
1269 surface->invalid_regions.erase(copy_interval);
1270 continue;
1271 } 490 }
1272
1273 // Load data from Switch memory
1274 FlushRegion(params.addr, params.size);
1275 surface->LoadGLBuffer(params.addr, params.end);
1276 surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
1277 draw_framebuffer.handle);
1278 surface->invalid_regions.erase(params.GetInterval());
1279 } 491 }
1280}
1281 492
1282void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { 493 if (surfaces.empty()) {
1283 if (size == 0) 494 return {};
1284 return; 495 }
1285 496
1286 const SurfaceInterval flush_interval(addr, addr + size); 497 ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported");
1287 SurfaceRegions flushed_intervals;
1288 498
1289 for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { 499 return surfaces[0];
1290 // small sizes imply that this most likely comes from the cpu, flush the entire region 500}
1291 // the point is to avoid thousands of small writes every frame if the cpu decides to access
1292 // that region, anything higher than 8 you're guaranteed it comes from a service
1293 const auto interval = size <= 8 ? pair.first : pair.first & flush_interval;
1294 auto& surface = pair.second;
1295 501
1296 if (flush_surface != nullptr && surface != flush_surface) 502void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) {
1297 continue; 503 // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should
504 // probably implement this in the future, but for now, the `use_accurate_framebufers` setting
505 // can be used to always flush.
506}
1298 507
1299 // Sanity check, this surface is the last one that marked this region dirty 508void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
1300 ASSERT(surface->IsRegionValid(interval)); 509 for (const auto& pair : surface_cache) {
510 const auto& surface{pair.second};
511 const auto& params{surface->GetSurfaceParams()};
1301 512
1302 if (surface->type != SurfaceType::Fill) { 513 if (params.IsOverlappingRegion(addr, size)) {
1303 SurfaceParams params = surface->FromInterval(interval); 514 UnregisterSurface(surface);
1304 surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
1305 draw_framebuffer.handle);
1306 } 515 }
1307 surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval));
1308 flushed_intervals += interval;
1309 } 516 }
1310 // Reset dirty regions
1311 dirty_regions -= flushed_intervals;
1312} 517}
1313 518
1314void RasterizerCacheOpenGL::FlushAll() { 519void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
1315 FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); 520 const auto& params{surface->GetSurfaceParams()};
1316} 521 const auto& surface_key{SurfaceKey::Create(params)};
522 const auto& search{surface_cache.find(surface_key)};
1317 523
1318void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, 524 if (search != surface_cache.end()) {
1319 const Surface& region_owner) { 525 // Registered already
1320 if (size == 0)
1321 return; 526 return;
1322
1323 const SurfaceInterval invalid_interval(addr, addr + size);
1324
1325 if (region_owner != nullptr) {
1326 ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
1327 // Surfaces can't have a gap
1328 ASSERT(region_owner->width == region_owner->stride);
1329 region_owner->invalid_regions.erase(invalid_interval);
1330 } 527 }
1331 528
1332 for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { 529 surface_cache[surface_key] = surface;
1333 for (auto& cached_surface : pair.second) { 530 UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
1334 if (cached_surface == region_owner)
1335 continue;
1336
1337 // If cpu is invalidating this region we want to remove it
1338 // to (likely) mark the memory pages as uncached
1339 if (region_owner == nullptr && size <= 8) {
1340 FlushRegion(cached_surface->addr, cached_surface->size, cached_surface);
1341 remove_surfaces.emplace(cached_surface);
1342 continue;
1343 }
1344
1345 const auto interval = cached_surface->GetInterval() & invalid_interval;
1346 cached_surface->invalid_regions.insert(interval);
1347
1348 // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures
1349 if (cached_surface->type == SurfaceType::Fill &&
1350 cached_surface->IsSurfaceFullyInvalid()) {
1351 remove_surfaces.emplace(cached_surface);
1352 }
1353 }
1354 }
1355
1356 if (region_owner != nullptr)
1357 dirty_regions.set({invalid_interval, region_owner});
1358 else
1359 dirty_regions.erase(invalid_interval);
1360
1361 for (auto& remove_surface : remove_surfaces) {
1362 if (remove_surface == region_owner) {
1363 Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(
1364 surface_cache, *region_owner, ScaleMatch::Ignore);
1365 ASSERT(expanded_surface);
1366
1367 if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) {
1368 DuplicateSurface(region_owner, expanded_surface);
1369 } else {
1370 continue;
1371 }
1372 }
1373 UnregisterSurface(remove_surface);
1374 }
1375
1376 remove_surfaces.clear();
1377} 531}
1378 532
1379Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { 533void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
1380 Surface surface = std::make_shared<CachedSurface>(); 534 const auto& params{surface->GetSurfaceParams()};
1381 static_cast<SurfaceParams&>(*surface) = params; 535 const auto& surface_key{SurfaceKey::Create(params)};
1382 536 const auto& search{surface_cache.find(surface_key)};
1383 surface->texture.Create();
1384
1385 surface->gl_buffer_size = 0;
1386 surface->invalid_regions.insert(surface->GetInterval());
1387 AllocateSurfaceTexture(surface->texture.handle,
1388 GetFormatTuple(surface->pixel_format, surface->component_type),
1389 surface->GetScaledWidth(), surface->GetScaledHeight());
1390
1391 return surface;
1392}
1393 537
1394void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { 538 if (search == surface_cache.end()) {
1395 if (surface->registered) { 539 // Unregistered already
1396 return; 540 return;
1397 } 541 }
1398 surface->registered = true; 542
1399 surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); 543 UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
1400 UpdatePagesCachedCount(surface->addr, surface->size, 1); 544 surface_cache.erase(search);
1401} 545}
1402 546
1403void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { 547template <typename Map, typename Interval>
1404 if (!surface->registered) { 548constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
1405 return; 549 return boost::make_iterator_range(map.equal_range(interval));
1406 }
1407 surface->registered = false;
1408 UpdatePagesCachedCount(surface->addr, surface->size, -1);
1409 surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
1410} 550}
1411 551
1412void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { 552void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9da945e19..85e7c8888 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -1,57 +1,26 @@
1// Copyright 2015 Citra Emulator Project 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <map>
8#include <memory> 9#include <memory>
9#include <set> 10#include <vector>
10#include <tuple>
11#ifdef __GNUC__
12#pragma GCC diagnostic push
13#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
14#endif
15#include <boost/icl/interval_map.hpp> 11#include <boost/icl/interval_map.hpp>
16#include <boost/icl/interval_set.hpp>
17#ifdef __GNUC__
18#pragma GCC diagnostic pop
19#endif
20#include <boost/optional.hpp>
21#include <glad/glad.h>
22#include "common/assert.h"
23#include "common/common_funcs.h"
24#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/hash.h"
25#include "common/math_util.h" 14#include "common/math_util.h"
26#include "video_core/gpu.h" 15#include "video_core/engines/maxwell_3d.h"
27#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
30 18
31struct CachedSurface; 19class CachedSurface;
32using Surface = std::shared_ptr<CachedSurface>; 20using Surface = std::shared_ptr<CachedSurface>;
33using SurfaceSet = std::set<Surface>;
34
35using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>;
36using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>;
37using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>;
38
39using SurfaceInterval = SurfaceCache::interval_type;
40static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
41 std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
42 "incorrect interval types");
43
44using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
45using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 21using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
46
47using PageMap = boost::icl::interval_map<u64, int>; 22using PageMap = boost::icl::interval_map<u64, int>;
48 23
49enum class ScaleMatch {
50 Exact, // only accept same res scale
51 Upscale, // only allow higher scale than params
52 Ignore // accept every scaled res
53};
54
55struct SurfaceParams { 24struct SurfaceParams {
56 enum class PixelFormat { 25 enum class PixelFormat {
57 ABGR8 = 0, 26 ABGR8 = 0,
@@ -93,10 +62,10 @@ struct SurfaceParams {
93 /** 62 /**
94 * Gets the compression factor for the specified PixelFormat. This applies to just the 63 * Gets the compression factor for the specified PixelFormat. This applies to just the
95 * "compressed width" and "compressed height", not the overall compression factor of a 64 * "compressed width" and "compressed height", not the overall compression factor of a
96 * compressed image. This is used for maintaining proper surface sizes for compressed texture 65 * compressed image. This is used for maintaining proper surface sizes for compressed
97 * formats. 66 * texture formats.
98 */ 67 */
99 static constexpr u32 GetCompresssionFactor(PixelFormat format) { 68 static constexpr u32 GetCompressionFactor(PixelFormat format) {
100 if (format == PixelFormat::Invalid) 69 if (format == PixelFormat::Invalid)
101 return 0; 70 return 0;
102 71
@@ -112,15 +81,12 @@ struct SurfaceParams {
112 4, // DXT23 81 4, // DXT23
113 4, // DXT45 82 4, // DXT45
114 4, // DXN1 83 4, // DXN1
115 1, // ASTC_2D_4X4 84 4, // ASTC_2D_4X4
116 }}; 85 }};
117 86
118 ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); 87 ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
119 return compression_factor_table[static_cast<size_t>(format)]; 88 return compression_factor_table[static_cast<size_t>(format)];
120 } 89 }
121 u32 GetCompresssionFactor() const {
122 return GetCompresssionFactor(pixel_format);
123 }
124 90
125 static constexpr u32 GetFormatBpp(PixelFormat format) { 91 static constexpr u32 GetFormatBpp(PixelFormat format) {
126 if (format == PixelFormat::Invalid) 92 if (format == PixelFormat::Invalid)
@@ -165,25 +131,6 @@ struct SurfaceParams {
165 } 131 }
166 } 132 }
167 133
168 static bool IsFormatASTC(PixelFormat format) {
169 switch (format) {
170 case PixelFormat::ASTC_2D_4X4:
171 return true;
172 default:
173 return false;
174 }
175 }
176
177 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
178 switch (format) {
179 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
180 return PixelFormat::ABGR8;
181 default:
182 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
183 UNREACHABLE();
184 }
185 }
186
187 static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { 134 static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) {
188 // TODO(Subv): Properly implement this 135 // TODO(Subv): Properly implement this
189 switch (format) { 136 switch (format) {
@@ -276,36 +223,16 @@ struct SurfaceParams {
276 } 223 }
277 } 224 }
278 225
279 static ComponentType ComponentTypeFromGPUPixelFormat( 226 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
280 Tegra::FramebufferConfig::PixelFormat format) {
281 switch (format) { 227 switch (format) {
282 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 228 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
283 return ComponentType::UNorm; 229 return PixelFormat::ABGR8;
284 default: 230 default:
285 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 231 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
286 UNREACHABLE(); 232 UNREACHABLE();
287 } 233 }
288 } 234 }
289 235
290 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
291 SurfaceType a_type = GetFormatType(pixel_format_a);
292 SurfaceType b_type = GetFormatType(pixel_format_b);
293
294 if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
295 return true;
296 }
297
298 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
299 return true;
300 }
301
302 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
303 return true;
304 }
305
306 return false;
307 }
308
309 static SurfaceType GetFormatType(PixelFormat pixel_format) { 236 static SurfaceType GetFormatType(PixelFormat pixel_format) {
310 if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { 237 if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
311 return SurfaceType::ColorTexture; 238 return SurfaceType::ColorTexture;
@@ -317,168 +244,101 @@ struct SurfaceParams {
317 return SurfaceType::Invalid; 244 return SurfaceType::Invalid;
318 } 245 }
319 246
320 /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" 247 /// Returns the rectangle corresponding to this surface
321 /// and "pixel_format" 248 MathUtil::Rectangle<u32> GetRect() const;
322 void UpdateParams() {
323 if (stride == 0) {
324 stride = width;
325 }
326 type = GetFormatType(pixel_format);
327 size = !is_tiled ? BytesInPixels(stride * (height - 1) + width)
328 : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8);
329 end = addr + size;
330 }
331
332 SurfaceInterval GetInterval() const {
333 return SurfaceInterval::right_open(addr, end);
334 }
335
336 // Returns the outer rectangle containing "interval"
337 SurfaceParams FromInterval(SurfaceInterval interval) const;
338
339 SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
340
341 // Returns the region of the biggest valid rectange within interval
342 SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
343
344 /**
345 * Gets the actual width (in pixels) of the surface. This is provided because `width` is used
346 * for tracking the surface region in memory, which may be compressed for certain formats. In
347 * this scenario, `width` is actually the compressed width.
348 */
349 u32 GetActualWidth() const {
350 return width * GetCompresssionFactor();
351 }
352
353 /**
354 * Gets the actual height (in pixels) of the surface. This is provided because `height` is used
355 * for tracking the surface region in memory, which may be compressed for certain formats. In
356 * this scenario, `height` is actually the compressed height.
357 */
358 u32 GetActualHeight() const {
359 return height * GetCompresssionFactor();
360 }
361 249
362 u32 GetScaledWidth() const { 250 /// Returns the size of this surface in bytes, adjusted for compression
363 return width * res_scale; 251 size_t SizeInBytes() const {
252 const u32 compression_factor{GetCompressionFactor(pixel_format)};
253 ASSERT(width % compression_factor == 0);
254 ASSERT(height % compression_factor == 0);
255 return (width / compression_factor) * (height / compression_factor) *
256 GetFormatBpp(pixel_format) / CHAR_BIT;
364 } 257 }
365 258
366 u32 GetScaledHeight() const { 259 /// Returns the CPU virtual address for this surface
367 return height * res_scale; 260 VAddr GetCpuAddr() const;
368 }
369 261
370 MathUtil::Rectangle<u32> GetRect() const { 262 /// Returns true if the specified region overlaps with this surface's region in Switch memory
371 return {0, height, width, 0}; 263 bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
264 return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
372 } 265 }
373 266
374 MathUtil::Rectangle<u32> GetScaledRect() const { 267 /// Creates SurfaceParams from a texture configation
375 return {0, GetScaledHeight(), GetScaledWidth(), 0}; 268 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
376 } 269
270 /// Creates SurfaceParams from a framebuffer configation
271 static SurfaceParams CreateForFramebuffer(
272 const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
273
274 Tegra::GPUVAddr addr;
275 bool is_tiled;
276 u32 block_height;
277 PixelFormat pixel_format;
278 ComponentType component_type;
279 SurfaceType type;
280 u32 width;
281 u32 height;
282 u32 unaligned_height;
283 size_t size_in_bytes;
284};
377 285
378 u64 PixelsInBytes(u64 size) const { 286/// Hashable variation of SurfaceParams, used for a key in the surface cache
379 return size * CHAR_BIT / GetFormatBpp(pixel_format); 287struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
288 static SurfaceKey Create(const SurfaceParams& params) {
289 SurfaceKey res;
290 res.state = params;
291 return res;
380 } 292 }
293};
381 294
382 u64 BytesInPixels(u64 pixels) const { 295namespace std {
383 return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; 296template <>
297struct hash<SurfaceKey> {
298 size_t operator()(const SurfaceKey& k) const {
299 return k.Hash();
384 } 300 }
385
386 VAddr GetCpuAddr() const;
387
388 bool ExactMatch(const SurfaceParams& other_surface) const;
389 bool CanSubRect(const SurfaceParams& sub_surface) const;
390 bool CanExpand(const SurfaceParams& expanded_surface) const;
391 bool CanTexCopy(const SurfaceParams& texcopy_params) const;
392
393 MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
394 MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
395
396 Tegra::GPUVAddr addr = 0;
397 Tegra::GPUVAddr end = 0;
398 boost::optional<VAddr> cpu_addr;
399 u64 size = 0;
400
401 u32 width = 0;
402 u32 height = 0;
403 u32 stride = 0;
404 u32 block_height = 0;
405 u16 res_scale = 1;
406
407 bool is_tiled = false;
408 PixelFormat pixel_format = PixelFormat::Invalid;
409 SurfaceType type = SurfaceType::Invalid;
410 ComponentType component_type = ComponentType::Invalid;
411}; 301};
302} // namespace std
412 303
413struct CachedSurface : SurfaceParams { 304class CachedSurface final {
414 bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; 305public:
415 bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; 306 CachedSurface(const SurfaceParams& params);
416
417 bool IsRegionValid(SurfaceInterval interval) const {
418 return (invalid_regions.find(interval) == invalid_regions.end());
419 }
420 307
421 bool IsSurfaceFullyInvalid() const { 308 const OGLTexture& Texture() const {
422 return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); 309 return texture;
423 } 310 }
424 311
425 bool registered = false; 312 static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
426 SurfaceRegions invalid_regions; 313 if (format == SurfaceParams::PixelFormat::Invalid)
427
428 u64 fill_size = 0; /// Number of bytes to read from fill_data
429 std::array<u8, 4> fill_data;
430
431 OGLTexture texture;
432
433 static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
434 if (format == PixelFormat::Invalid)
435 return 0; 314 return 0;
436 315
437 return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; 316 return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
438 } 317 }
439 318
440 std::unique_ptr<u8[]> gl_buffer; 319 const SurfaceParams& GetSurfaceParams() const {
441 size_t gl_buffer_size = 0; 320 return params;
321 }
442 322
443 // Read/Write data in Switch memory to/from gl_buffer 323 // Read/Write data in Switch memory to/from gl_buffer
444 void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); 324 void LoadGLBuffer();
445 void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); 325 void FlushGLBuffer();
446 326
447 // Upload/Download data in gl_buffer in/to this surface's texture 327 // Upload/Download data in gl_buffer in/to this surface's texture
448 void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, 328 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
449 GLuint draw_fb_handle); 329 void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
450 void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, 330
451 GLuint draw_fb_handle); 331private:
332 OGLTexture texture;
333 std::vector<u8> gl_buffer;
334 SurfaceParams params;
452}; 335};
453 336
454class RasterizerCacheOpenGL : NonCopyable { 337class RasterizerCacheOpenGL final : NonCopyable {
455public: 338public:
456 RasterizerCacheOpenGL(); 339 RasterizerCacheOpenGL();
457 ~RasterizerCacheOpenGL(); 340 ~RasterizerCacheOpenGL();
458 341
459 /// Blit one surface's texture to another
460 bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
461 const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
462
463 void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect,
464 GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect);
465
466 /// Copy one surface's region to another
467 void CopySurface(const Surface& src_surface, const Surface& dst_surface,
468 SurfaceInterval copy_interval);
469
470 /// Load a texture from Switch memory to OpenGL and cache it (if not already cached)
471 Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
472 bool load_if_create);
473
474 /// Tries to find a framebuffer GPU address based on the provided CPU address
475 boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const;
476
477 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
478 /// Switch memory to OpenGL and caches it (if not already cached)
479 SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
480 bool load_if_create);
481
482 /// Get a surface based on the texture configuration 342 /// Get a surface based on the texture configuration
483 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); 343 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
484 344
@@ -486,29 +346,21 @@ public:
486 SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, 346 SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
487 const MathUtil::Rectangle<s32>& viewport); 347 const MathUtil::Rectangle<s32>& viewport);
488 348
489 /// Get a surface that matches the fill config 349 /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
490 Surface GetFillSurface(const void* config); 350 void MarkSurfaceAsDirty(const Surface& surface);
491 351
492 /// Get a surface that matches a "texture copy" display transfer config 352 /// Tries to find a framebuffer GPU address based on the provided CPU address
493 SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); 353 Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
494 354
495 /// Write any cached resources overlapping the region back to memory (if dirty) 355 /// Write any cached resources overlapping the region back to memory (if dirty)
496 void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); 356 void FlushRegion(Tegra::GPUVAddr addr, size_t size);
497
498 /// Mark region as being invalidated by region_owner (nullptr if Switch memory)
499 void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner);
500 357
501 /// Flush all cached resources tracked by this cache manager 358 /// Mark the specified region as being invalidated
502 void FlushAll(); 359 void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
503 360
504private: 361private:
505 void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); 362 void LoadSurface(const Surface& surface);
506 363 Surface GetSurface(const SurfaceParams& params);
507 /// Update surface's texture for given region when necessary
508 void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size);
509
510 /// Create a new surface
511 Surface CreateSurface(const SurfaceParams& params);
512 364
513 /// Register surface into the cache 365 /// Register surface into the cache
514 void RegisterSurface(const Surface& surface); 366 void RegisterSurface(const Surface& surface);
@@ -519,18 +371,9 @@ private:
519 /// Increase/decrease the number of surface in pages touching the specified region 371 /// Increase/decrease the number of surface in pages touching the specified region
520 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); 372 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
521 373
522 SurfaceCache surface_cache; 374 std::unordered_map<SurfaceKey, Surface> surface_cache;
523 PageMap cached_pages; 375 PageMap cached_pages;
524 SurfaceMap dirty_regions;
525 SurfaceSet remove_surfaces;
526 376
527 OGLFramebuffer read_framebuffer; 377 OGLFramebuffer read_framebuffer;
528 OGLFramebuffer draw_framebuffer; 378 OGLFramebuffer draw_framebuffer;
529
530 OGLVertexArray attributeless_vao;
531 OGLBuffer d24s8_abgr_buffer;
532 GLsizeiptr d24s8_abgr_buffer_size;
533 OGLProgram d24s8_abgr_shader;
534 GLint d24s8_abgr_tbo_size_u_id;
535 GLint d24s8_abgr_viewport_u_id;
536}; 379};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f33766bfd..e3bb2cbb8 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
150 screen_info)) { 150 screen_info)) {
151 // Reset the screen info's display texture to its own permanent texture 151 // Reset the screen info's display texture to its own permanent texture
152 screen_info.display_texture = screen_info.texture.resource.handle; 152 screen_info.display_texture = screen_info.texture.resource.handle;
153 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
154 153
155 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, 154 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
156 Memory::FlushMode::Flush); 155 Memory::FlushMode::Flush);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 2cc6d9a00..21f0d298c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -27,7 +27,7 @@ struct TextureInfo {
27/// Structure used for storing information about the display target for the Switch screen 27/// Structure used for storing information about the display target for the Switch screen
28struct ScreenInfo { 28struct ScreenInfo {
29 GLuint display_texture; 29 GLuint display_texture;
30 MathUtil::Rectangle<float> display_texcoords; 30 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
31 TextureInfo texture; 31 TextureInfo texture;
32}; 32};
33 33
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8316db708..cd7986efa 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -84,6 +84,8 @@ void Config::ReadValues() {
84 qt_config->beginGroup("Renderer"); 84 qt_config->beginGroup("Renderer");
85 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); 85 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
86 Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); 86 Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool();
87 Settings::values.use_accurate_framebuffers =
88 qt_config->value("use_accurate_framebuffers", false).toBool();
87 89
88 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); 90 Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
89 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); 91 Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -184,6 +186,7 @@ void Config::SaveValues() {
184 qt_config->beginGroup("Renderer"); 186 qt_config->beginGroup("Renderer");
185 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); 187 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
186 qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit); 188 qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit);
189 qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers);
187 190
188 // Cast to double because Qt's written float values are not human-readable 191 // Cast to double because Qt's written float values are not human-readable
189 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 192 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 47b9b6e95..7664880d5 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -59,11 +59,13 @@ void ConfigureGraphics::setConfiguration() {
59 ui->resolution_factor_combobox->setCurrentIndex( 59 ui->resolution_factor_combobox->setCurrentIndex(
60 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); 60 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
61 ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit); 61 ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit);
62 ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);
62} 63}
63 64
64void ConfigureGraphics::applyConfiguration() { 65void ConfigureGraphics::applyConfiguration() {
65 Settings::values.resolution_factor = 66 Settings::values.resolution_factor =
66 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 67 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
67 Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked(); 68 Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked();
69 Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();
68 Settings::Apply(); 70 Settings::Apply();
69} 71}
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 366931a9a..7d092df03 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -30,6 +30,13 @@
30 </widget> 30 </widget>
31 </item> 31 </item>
32 <item> 32 <item>
33 <widget class="QCheckBox" name="use_accurate_framebuffers">
34 <property name="text">
35 <string>Use accurate framebuffers (slow)</string>
36 </property>
37 </widget>
38 </item>
39 <item>
33 <layout class="QHBoxLayout" name="horizontalLayout"> 40 <layout class="QHBoxLayout" name="horizontalLayout">
34 <item> 41 <item>
35 <widget class="QLabel" name="label"> 42 <widget class="QLabel" name="label">
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ee6e4d658..150915c17 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -98,6 +98,8 @@ void Config::ReadValues() {
98 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 98 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
99 Settings::values.toggle_framelimit = 99 Settings::values.toggle_framelimit =
100 sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); 100 sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true);
101 Settings::values.use_accurate_framebuffers =
102 sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false);
101 103
102 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 104 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
103 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 105 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 1c438c3f5..5896971d4 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -102,6 +102,10 @@ resolution_factor =
102# 0 (default): Off, 1: On 102# 0 (default): Off, 1: On
103use_vsync = 103use_vsync =
104 104
105# Whether to use accurate framebuffers
106# 0 (default): Off (fast), 1 : On (slow)
107use_accurate_framebuffers =
108
105# The clear color for the renderer. What shows up on the sides of the bottom screen. 109# The clear color for the renderer. What shows up on the sides of the bottom screen.
106# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 110# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
107bg_red = 111bg_red =