diff options
| author | 2018-06-21 19:36:01 -0400 | |
|---|---|---|
| committer | 2018-06-27 00:08:03 -0400 | |
| commit | 5f57ab1b2aa80d427b6e454f8904e2e937a6981e (patch) | |
| tree | 25e6b89e1a0bf9e90ff317a2675fdb1d617ffdd8 /src | |
| parent | Merge pull request #594 from bunnei/max-constbuff (diff) | |
| download | yuzu-5f57ab1b2aa80d427b6e454f8904e2e937a6981e.tar.gz yuzu-5f57ab1b2aa80d427b6e454f8904e2e937a6981e.tar.xz yuzu-5f57ab1b2aa80d427b6e454f8904e2e937a6981e.zip | |
gl_rasterizer_cache: Remove Citra's rasterizer cache, always load/flush surfaces.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 117 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1262 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 323 |
4 files changed, 210 insertions, 1494 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3fbf8e1f9..bc463fc30 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | |||
| 146 | u64 size = end - start + 1; | 146 | u64 size = end - start + 1; |
| 147 | 147 | ||
| 148 | // Copy vertex array data | 148 | // Copy vertex array data |
| 149 | res_cache.FlushRegion(start, size, nullptr); | ||
| 150 | Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); | 149 | Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); |
| 151 | 150 | ||
| 152 | // Bind the vertex array to the buffer at the current offset. | 151 | // Bind the vertex array to the buffer at the current offset. |
| @@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 325 | std::tie(color_surface, depth_surface, surfaces_rect) = | 324 | std::tie(color_surface, depth_surface, surfaces_rect) = |
| 326 | res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); | 325 | res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); |
| 327 | 326 | ||
| 328 | const u16 res_scale = color_surface != nullptr | ||
| 329 | ? color_surface->res_scale | ||
| 330 | : (depth_surface == nullptr ? 1u : depth_surface->res_scale); | ||
| 331 | |||
| 332 | MathUtil::Rectangle<u32> draw_rect{ | 327 | MathUtil::Rectangle<u32> draw_rect{ |
| 328 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, | ||
| 329 | surfaces_rect.left, surfaces_rect.right)), // Left | ||
| 330 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, | ||
| 331 | surfaces_rect.bottom, surfaces_rect.top)), // Top | ||
| 332 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right, | ||
| 333 | surfaces_rect.left, surfaces_rect.right)), // Right | ||
| 333 | static_cast<u32>( | 334 | static_cast<u32>( |
| 334 | std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, | 335 | std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom, |
| 335 | surfaces_rect.left, surfaces_rect.right)), // Left | 336 | surfaces_rect.bottom, surfaces_rect.top))}; // Bottom |
| 336 | static_cast<u32>( | ||
| 337 | std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, | ||
| 338 | surfaces_rect.bottom, surfaces_rect.top)), // Top | ||
| 339 | static_cast<u32>( | ||
| 340 | std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, | ||
| 341 | surfaces_rect.left, surfaces_rect.right)), // Right | ||
| 342 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + | ||
| 343 | viewport_rect.bottom * res_scale, | ||
| 344 | surfaces_rect.bottom, surfaces_rect.top))}; // Bottom | ||
| 345 | 337 | ||
| 346 | // Bind the framebuffer surfaces | 338 | // Bind the framebuffer surfaces |
| 347 | BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); | 339 | BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); |
| 348 | 340 | ||
| 349 | // Sync the viewport | 341 | // Sync the viewport |
| 350 | SyncViewport(surfaces_rect, res_scale); | 342 | SyncViewport(surfaces_rect); |
| 351 | 343 | ||
| 352 | // Sync the blend state registers | 344 | // Sync the blend state registers |
| 353 | SyncBlendState(); | 345 | SyncBlendState(); |
| @@ -442,44 +434,23 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 442 | state.Apply(); | 434 | state.Apply(); |
| 443 | 435 | ||
| 444 | // Mark framebuffer surfaces as dirty | 436 | // Mark framebuffer surfaces as dirty |
| 445 | MathUtil::Rectangle<u32> draw_rect_unscaled{ | ||
| 446 | draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, | ||
| 447 | draw_rect.bottom / res_scale}; | ||
| 448 | |||
| 449 | if (color_surface != nullptr && write_color_fb) { | 437 | if (color_surface != nullptr && write_color_fb) { |
| 450 | auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); | 438 | res_cache.FlushSurface(color_surface); |
| 451 | res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), | ||
| 452 | color_surface); | ||
| 453 | } | 439 | } |
| 454 | if (depth_surface != nullptr && write_depth_fb) { | 440 | if (depth_surface != nullptr && write_depth_fb) { |
| 455 | auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); | 441 | res_cache.FlushSurface(depth_surface); |
| 456 | res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), | ||
| 457 | depth_surface); | ||
| 458 | } | 442 | } |
| 459 | } | 443 | } |
| 460 | 444 | ||
| 461 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} | 445 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} |
| 462 | 446 | ||
| 463 | void RasterizerOpenGL::FlushAll() { | 447 | void RasterizerOpenGL::FlushAll() {} |
| 464 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 465 | res_cache.FlushAll(); | ||
| 466 | } | ||
| 467 | 448 | ||
| 468 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { | 449 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} |
| 469 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 470 | res_cache.FlushRegion(addr, size); | ||
| 471 | } | ||
| 472 | 450 | ||
| 473 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 451 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} |
| 474 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 475 | res_cache.InvalidateRegion(addr, size, nullptr); | ||
| 476 | } | ||
| 477 | 452 | ||
| 478 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 453 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} |
| 479 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 480 | res_cache.FlushRegion(addr, size); | ||
| 481 | res_cache.InvalidateRegion(addr, size, nullptr); | ||
| 482 | } | ||
| 483 | 454 | ||
| 484 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { | 455 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { |
| 485 | MICROPROFILE_SCOPE(OpenGL_Blits); | 456 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| @@ -500,44 +471,8 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { | |||
| 500 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, | 471 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, |
| 501 | VAddr framebuffer_addr, u32 pixel_stride, | 472 | VAddr framebuffer_addr, u32 pixel_stride, |
| 502 | ScreenInfo& screen_info) { | 473 | ScreenInfo& screen_info) { |
| 503 | if (framebuffer_addr == 0) { | 474 | // TODO(bunnei): ImplementMe |
| 504 | return false; | 475 | return false; |
| 505 | } | ||
| 506 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 507 | |||
| 508 | SurfaceParams src_params; | ||
| 509 | src_params.cpu_addr = framebuffer_addr; | ||
| 510 | src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); | ||
| 511 | src_params.width = std::min(framebuffer.width, pixel_stride); | ||
| 512 | src_params.height = framebuffer.height; | ||
| 513 | src_params.stride = pixel_stride; | ||
| 514 | src_params.is_tiled = true; | ||
| 515 | src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | ||
| 516 | src_params.pixel_format = | ||
| 517 | SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); | ||
| 518 | src_params.component_type = | ||
| 519 | SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); | ||
| 520 | src_params.UpdateParams(); | ||
| 521 | |||
| 522 | MathUtil::Rectangle<u32> src_rect; | ||
| 523 | Surface src_surface; | ||
| 524 | std::tie(src_surface, src_rect) = | ||
| 525 | res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); | ||
| 526 | |||
| 527 | if (src_surface == nullptr) { | ||
| 528 | return false; | ||
| 529 | } | ||
| 530 | |||
| 531 | u32 scaled_width = src_surface->GetScaledWidth(); | ||
| 532 | u32 scaled_height = src_surface->GetScaledHeight(); | ||
| 533 | |||
| 534 | screen_info.display_texcoords = MathUtil::Rectangle<float>( | ||
| 535 | (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, | ||
| 536 | (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); | ||
| 537 | |||
| 538 | screen_info.display_texture = src_surface->texture.handle; | ||
| 539 | |||
| 540 | return true; | ||
| 541 | } | 476 | } |
| 542 | 477 | ||
| 543 | void RasterizerOpenGL::SamplerInfo::Create() { | 478 | void RasterizerOpenGL::SamplerInfo::Create() { |
| @@ -674,7 +609,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, | |||
| 674 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | 609 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); |
| 675 | Surface surface = res_cache.GetTextureSurface(texture); | 610 | Surface surface = res_cache.GetTextureSurface(texture); |
| 676 | if (surface != nullptr) { | 611 | if (surface != nullptr) { |
| 677 | state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; | 612 | state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle; |
| 678 | state.texture_units[current_bindpoint].swizzle.r = | 613 | state.texture_units[current_bindpoint].swizzle.r = |
| 679 | MaxwellToGL::SwizzleSource(texture.tic.x_source); | 614 | MaxwellToGL::SwizzleSource(texture.tic.x_source); |
| 680 | state.texture_units[current_bindpoint].swizzle.g = | 615 | state.texture_units[current_bindpoint].swizzle.g = |
| @@ -700,16 +635,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, | |||
| 700 | state.Apply(); | 635 | state.Apply(); |
| 701 | 636 | ||
| 702 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | 637 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 703 | color_surface != nullptr ? color_surface->texture.handle : 0, 0); | 638 | color_surface != nullptr ? color_surface->Texture().handle : 0, 0); |
| 704 | if (depth_surface != nullptr) { | 639 | if (depth_surface != nullptr) { |
| 705 | if (has_stencil) { | 640 | if (has_stencil) { |
| 706 | // attach both depth and stencil | 641 | // attach both depth and stencil |
| 707 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | 642 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, |
| 708 | depth_surface->texture.handle, 0); | 643 | depth_surface->Texture().handle, 0); |
| 709 | } else { | 644 | } else { |
| 710 | // attach depth | 645 | // attach depth |
| 711 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | 646 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, |
| 712 | depth_surface->texture.handle, 0); | 647 | depth_surface->Texture().handle, 0); |
| 713 | // clear stencil attachment | 648 | // clear stencil attachment |
| 714 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 649 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); |
| 715 | } | 650 | } |
| @@ -720,14 +655,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, | |||
| 720 | } | 655 | } |
| 721 | } | 656 | } |
| 722 | 657 | ||
| 723 | void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { | 658 | void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) { |
| 724 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | 659 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; |
| 725 | const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; | 660 | const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; |
| 726 | 661 | ||
| 727 | state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; | 662 | state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left; |
| 728 | state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; | 663 | state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom; |
| 729 | state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); | 664 | state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); |
| 730 | state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); | 665 | state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight()); |
| 731 | } | 666 | } |
| 732 | 667 | ||
| 733 | void RasterizerOpenGL::SyncClipEnabled() { | 668 | void RasterizerOpenGL::SyncClipEnabled() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4762983c9..621200f03 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -109,7 +109,7 @@ private: | |||
| 109 | u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); | 109 | u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); |
| 110 | 110 | ||
| 111 | /// Syncs the viewport to match the guest state | 111 | /// Syncs the viewport to match the guest state |
| 112 | void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); | 112 | void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); |
| 113 | 113 | ||
| 114 | /// Syncs the clip enabled status to match the guest state | 114 | /// Syncs the clip enabled status to match the guest state |
| 115 | void SyncClipEnabled(); | 115 | void SyncClipEnabled(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 857164ff6..5fb099d8d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -1,37 +1,22 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | 1 | // Copyright 2018 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <atomic> | ||
| 7 | #include <cstring> | ||
| 8 | #include <iterator> | ||
| 9 | #include <memory> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | #include <boost/optional.hpp> | ||
| 13 | #include <boost/range/iterator_range.hpp> | ||
| 14 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 7 | |||
| 15 | #include "common/alignment.h" | 8 | #include "common/alignment.h" |
| 16 | #include "common/bit_field.h" | 9 | #include "common/assert.h" |
| 17 | #include "common/color.h" | ||
| 18 | #include "common/logging/log.h" | ||
| 19 | #include "common/math_util.h" | ||
| 20 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 21 | #include "common/scope_exit.h" | 11 | #include "common/scope_exit.h" |
| 22 | #include "core/core.h" | 12 | #include "core/core.h" |
| 23 | #include "core/frontend/emu_window.h" | ||
| 24 | #include "core/hle/kernel/process.h" | 13 | #include "core/hle/kernel/process.h" |
| 25 | #include "core/hle/kernel/vm_manager.h" | ||
| 26 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 27 | #include "core/settings.h" | ||
| 28 | #include "video_core/engines/maxwell_3d.h" | 15 | #include "video_core/engines/maxwell_3d.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 16 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 31 | #include "video_core/textures/astc.h" | 17 | #include "video_core/textures/astc.h" |
| 32 | #include "video_core/textures/decoders.h" | 18 | #include "video_core/textures/decoders.h" |
| 33 | #include "video_core/utils.h" | 19 | #include "video_core/utils.h" |
| 34 | #include "video_core/video_core.h" | ||
| 35 | 20 | ||
| 36 | using SurfaceType = SurfaceParams::SurfaceType; | 21 | using SurfaceType = SurfaceParams::SurfaceType; |
| 37 | using PixelFormat = SurfaceParams::PixelFormat; | 22 | using PixelFormat = SurfaceParams::PixelFormat; |
| @@ -77,15 +62,18 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | |||
| 77 | return {}; | 62 | return {}; |
| 78 | } | 63 | } |
| 79 | 64 | ||
| 80 | template <typename Map, typename Interval> | 65 | VAddr SurfaceParams::GetCpuAddr() const { |
| 81 | constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | 66 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 82 | return boost::make_iterator_range(map.equal_range(interval)); | 67 | return *gpu.memory_manager->GpuToCpuAddress(addr); |
| 83 | } | 68 | } |
| 84 | 69 | ||
| 85 | static u16 GetResolutionScaleFactor() { | 70 | static bool IsPixelFormatASTC(PixelFormat format) { |
| 86 | return static_cast<u16>(!Settings::values.resolution_factor | 71 | switch (format) { |
| 87 | ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() | 72 | case PixelFormat::ASTC_2D_4X4: |
| 88 | : Settings::values.resolution_factor); | 73 | return true; |
| 74 | default: | ||
| 75 | return false; | ||
| 76 | } | ||
| 89 | } | 77 | } |
| 90 | 78 | ||
| 91 | static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { | 79 | static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { |
| @@ -106,18 +94,17 @@ static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 wi | |||
| 106 | } | 94 | } |
| 107 | 95 | ||
| 108 | template <bool morton_to_gl, PixelFormat format> | 96 | template <bool morton_to_gl, PixelFormat format> |
| 109 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, | 97 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { |
| 110 | Tegra::GPUVAddr start, Tegra::GPUVAddr end) { | ||
| 111 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 98 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 112 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 99 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| 113 | const auto& gpu = Core::System::GetInstance().GPU(); | 100 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 114 | 101 | ||
| 115 | if (morton_to_gl) { | 102 | if (morton_to_gl) { |
| 116 | auto data = Tegra::Texture::UnswizzleTexture( | 103 | auto data = Tegra::Texture::UnswizzleTexture( |
| 117 | *gpu.memory_manager->GpuToCpuAddress(base), | 104 | *gpu.memory_manager->GpuToCpuAddress(addr), |
| 118 | SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); | 105 | SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); |
| 119 | 106 | ||
| 120 | if (SurfaceParams::IsFormatASTC(format)) { | 107 | if (IsPixelFormatASTC(format)) { |
| 121 | // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this | 108 | // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this |
| 122 | ConvertASTCToRGBA8(data, format, stride, height); | 109 | ConvertASTCToRGBA8(data, format, stride, height); |
| 123 | } | 110 | } |
| @@ -129,13 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra:: | |||
| 129 | NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); | 116 | NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); |
| 130 | VideoCore::MortonCopyPixels128( | 117 | VideoCore::MortonCopyPixels128( |
| 131 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, | 118 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, |
| 132 | Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, | 119 | Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, |
| 133 | morton_to_gl); | 120 | morton_to_gl); |
| 134 | } | 121 | } |
| 135 | } | 122 | } |
| 136 | 123 | ||
| 137 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, | 124 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), |
| 138 | Tegra::GPUVAddr), | ||
| 139 | SurfaceParams::MaxPixelFormat> | 125 | SurfaceParams::MaxPixelFormat> |
| 140 | morton_to_gl_fns = { | 126 | morton_to_gl_fns = { |
| 141 | MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, | 127 | MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, |
| @@ -146,8 +132,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: | |||
| 146 | MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | 132 | MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, |
| 147 | }; | 133 | }; |
| 148 | 134 | ||
| 149 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, | 135 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), |
| 150 | Tegra::GPUVAddr), | ||
| 151 | SurfaceParams::MaxPixelFormat> | 136 | SurfaceParams::MaxPixelFormat> |
| 152 | gl_to_morton_fns = { | 137 | gl_to_morton_fns = { |
| 153 | MortonCopy<false, PixelFormat::ABGR8>, | 138 | MortonCopy<false, PixelFormat::ABGR8>, |
| @@ -192,374 +177,76 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup | |||
| 192 | cur_state.Apply(); | 177 | cur_state.Apply(); |
| 193 | } | 178 | } |
| 194 | 179 | ||
| 195 | static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, | 180 | CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { |
| 196 | const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, | 181 | texture.Create(); |
| 197 | GLuint read_fb_handle, GLuint draw_fb_handle) { | 182 | AllocateSurfaceTexture(texture.handle, |
| 198 | 183 | GetFormatTuple(params.pixel_format, params.component_type), params.width, | |
| 199 | glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, | 184 | params.height); |
| 200 | GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), | ||
| 201 | src_rect.GetHeight(), 0); | ||
| 202 | return true; | ||
| 203 | } | ||
| 204 | |||
| 205 | static bool FillSurface(const Surface& surface, const u8* fill_data, | ||
| 206 | const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { | ||
| 207 | UNREACHABLE(); | ||
| 208 | return {}; | ||
| 209 | } | ||
| 210 | |||
| 211 | SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { | ||
| 212 | SurfaceParams params = *this; | ||
| 213 | const u32 tiled_size = is_tiled ? 8 : 1; | ||
| 214 | const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); | ||
| 215 | Tegra::GPUVAddr aligned_start = | ||
| 216 | addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); | ||
| 217 | Tegra::GPUVAddr aligned_end = | ||
| 218 | addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); | ||
| 219 | |||
| 220 | if (aligned_end - aligned_start > stride_tiled_bytes) { | ||
| 221 | params.addr = aligned_start; | ||
| 222 | params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); | ||
| 223 | } else { | ||
| 224 | // 1 row | ||
| 225 | ASSERT(aligned_end - aligned_start == stride_tiled_bytes); | ||
| 226 | const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); | ||
| 227 | aligned_start = | ||
| 228 | addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); | ||
| 229 | aligned_end = | ||
| 230 | addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); | ||
| 231 | params.addr = aligned_start; | ||
| 232 | params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); | ||
| 233 | params.stride = params.width; | ||
| 234 | params.height = tiled_size; | ||
| 235 | } | ||
| 236 | params.UpdateParams(); | ||
| 237 | |||
| 238 | return params; | ||
| 239 | } | ||
| 240 | |||
| 241 | SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { | ||
| 242 | if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { | ||
| 243 | return {}; | ||
| 244 | } | ||
| 245 | |||
| 246 | if (is_tiled) { | ||
| 247 | unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; | ||
| 248 | unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; | ||
| 249 | unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; | ||
| 250 | unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; | ||
| 251 | } | ||
| 252 | |||
| 253 | const u32 stride_tiled = !is_tiled ? stride : stride * 8; | ||
| 254 | |||
| 255 | const u32 pixel_offset = | ||
| 256 | stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + | ||
| 257 | unscaled_rect.left; | ||
| 258 | |||
| 259 | const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); | ||
| 260 | |||
| 261 | return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; | ||
| 262 | } | ||
| 263 | |||
| 264 | MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { | ||
| 265 | const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); | ||
| 266 | |||
| 267 | if (is_tiled) { | ||
| 268 | const int x0 = (begin_pixel_index % (stride * 8)) / 8; | ||
| 269 | const int y0 = (begin_pixel_index / (stride * 8)) * 8; | ||
| 270 | // Top to bottom | ||
| 271 | return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, | ||
| 272 | height - (y0 + sub_surface.height)); | ||
| 273 | } | ||
| 274 | |||
| 275 | const int x0 = begin_pixel_index % stride; | ||
| 276 | const int y0 = begin_pixel_index / stride; | ||
| 277 | // Bottom to top | ||
| 278 | return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); | ||
| 279 | } | ||
| 280 | |||
| 281 | MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { | ||
| 282 | auto rect = GetSubRect(sub_surface); | ||
| 283 | rect.left = rect.left * res_scale; | ||
| 284 | rect.right = rect.right * res_scale; | ||
| 285 | rect.top = rect.top * res_scale; | ||
| 286 | rect.bottom = rect.bottom * res_scale; | ||
| 287 | return rect; | ||
| 288 | } | ||
| 289 | |||
| 290 | bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { | ||
| 291 | return std::tie(other_surface.addr, other_surface.width, other_surface.height, | ||
| 292 | other_surface.stride, other_surface.block_height, other_surface.pixel_format, | ||
| 293 | other_surface.component_type, | ||
| 294 | other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, | ||
| 295 | pixel_format, component_type, is_tiled) && | ||
| 296 | pixel_format != PixelFormat::Invalid; | ||
| 297 | } | ||
| 298 | |||
| 299 | bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { | ||
| 300 | return sub_surface.addr >= addr && sub_surface.end <= end && | ||
| 301 | sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && | ||
| 302 | sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && | ||
| 303 | sub_surface.component_type == component_type && | ||
| 304 | (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && | ||
| 305 | (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && | ||
| 306 | GetSubRect(sub_surface).left + sub_surface.width <= stride; | ||
| 307 | } | ||
| 308 | |||
| 309 | bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { | ||
| 310 | return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && | ||
| 311 | addr <= expanded_surface.end && expanded_surface.addr <= end && | ||
| 312 | is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && | ||
| 313 | component_type == expanded_surface.component_type && stride == expanded_surface.stride && | ||
| 314 | (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % | ||
| 315 | BytesInPixels(stride * (is_tiled ? 8 : 1)) == | ||
| 316 | 0; | ||
| 317 | } | ||
| 318 | |||
| 319 | bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { | ||
| 320 | if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || | ||
| 321 | end < texcopy_params.end) { | ||
| 322 | return false; | ||
| 323 | } | ||
| 324 | if (texcopy_params.block_height != block_height || | ||
| 325 | texcopy_params.component_type != component_type) | ||
| 326 | return false; | ||
| 327 | |||
| 328 | if (texcopy_params.width != texcopy_params.stride) { | ||
| 329 | const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); | ||
| 330 | return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && | ||
| 331 | texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && | ||
| 332 | (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && | ||
| 333 | ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; | ||
| 334 | } | ||
| 335 | return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); | ||
| 336 | } | ||
| 337 | |||
| 338 | VAddr SurfaceParams::GetCpuAddr() const { | ||
| 339 | // When this function is used, only cpu_addr or (GPU) addr should be set, not both | ||
| 340 | ASSERT(!(cpu_addr && addr)); | ||
| 341 | const auto& gpu = Core::System::GetInstance().GPU(); | ||
| 342 | return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); | ||
| 343 | } | ||
| 344 | |||
| 345 | bool CachedSurface::CanFill(const SurfaceParams& dest_surface, | ||
| 346 | SurfaceInterval fill_interval) const { | ||
| 347 | if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && | ||
| 348 | boost::icl::first(fill_interval) >= addr && | ||
| 349 | boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range | ||
| 350 | dest_surface.FromInterval(fill_interval).GetInterval() == | ||
| 351 | fill_interval) { // make sure interval is a rectangle in dest surface | ||
| 352 | if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { | ||
| 353 | // Check if bits repeat for our fill_size | ||
| 354 | const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); | ||
| 355 | std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); | ||
| 356 | |||
| 357 | for (u32 i = 0; i < dest_bytes_per_pixel; ++i) | ||
| 358 | std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); | ||
| 359 | |||
| 360 | for (u32 i = 0; i < fill_size; ++i) | ||
| 361 | if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], | ||
| 362 | dest_bytes_per_pixel) != 0) | ||
| 363 | return false; | ||
| 364 | |||
| 365 | if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) | ||
| 366 | return false; | ||
| 367 | } | ||
| 368 | return true; | ||
| 369 | } | ||
| 370 | return false; | ||
| 371 | } | ||
| 372 | |||
| 373 | bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, | ||
| 374 | SurfaceInterval copy_interval) const { | ||
| 375 | SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); | ||
| 376 | ASSERT(subrect_params.GetInterval() == copy_interval); | ||
| 377 | if (CanSubRect(subrect_params)) | ||
| 378 | return true; | ||
| 379 | |||
| 380 | if (CanFill(dest_surface, copy_interval)) | ||
| 381 | return true; | ||
| 382 | |||
| 383 | return false; | ||
| 384 | } | ||
| 385 | |||
| 386 | SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { | ||
| 387 | SurfaceInterval result{}; | ||
| 388 | const auto valid_regions = | ||
| 389 | SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; | ||
| 390 | for (auto& valid_interval : valid_regions) { | ||
| 391 | const SurfaceInterval aligned_interval{ | ||
| 392 | addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, | ||
| 393 | BytesInPixels(is_tiled ? 8 * 8 : 1)), | ||
| 394 | addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, | ||
| 395 | BytesInPixels(is_tiled ? 8 * 8 : 1))}; | ||
| 396 | |||
| 397 | if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || | ||
| 398 | boost::icl::length(aligned_interval) == 0) { | ||
| 399 | continue; | ||
| 400 | } | ||
| 401 | |||
| 402 | // Get the rectangle within aligned_interval | ||
| 403 | const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); | ||
| 404 | SurfaceInterval rect_interval{ | ||
| 405 | addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), | ||
| 406 | addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), | ||
| 407 | }; | ||
| 408 | if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { | ||
| 409 | // 1 row | ||
| 410 | rect_interval = aligned_interval; | ||
| 411 | } else if (boost::icl::length(rect_interval) == 0) { | ||
| 412 | // 2 rows that do not make a rectangle, return the larger one | ||
| 413 | const SurfaceInterval row1{boost::icl::first(aligned_interval), | ||
| 414 | boost::icl::first(rect_interval)}; | ||
| 415 | const SurfaceInterval row2{boost::icl::first(rect_interval), | ||
| 416 | boost::icl::last_next(aligned_interval)}; | ||
| 417 | rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; | ||
| 418 | } | ||
| 419 | |||
| 420 | if (boost::icl::length(rect_interval) > boost::icl::length(result)) { | ||
| 421 | result = rect_interval; | ||
| 422 | } | ||
| 423 | } | ||
| 424 | return result; | ||
| 425 | } | ||
| 426 | |||
| 427 | void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 428 | SurfaceInterval copy_interval) { | ||
| 429 | SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); | ||
| 430 | ASSERT(subrect_params.GetInterval() == copy_interval); | ||
| 431 | |||
| 432 | ASSERT(src_surface != dst_surface); | ||
| 433 | |||
| 434 | // This is only called when CanCopy is true, no need to run checks here | ||
| 435 | if (src_surface->type == SurfaceType::Fill) { | ||
| 436 | // FillSurface needs a 4 bytes buffer | ||
| 437 | const u64 fill_offset = | ||
| 438 | (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; | ||
| 439 | std::array<u8, 4> fill_buffer; | ||
| 440 | |||
| 441 | u64 fill_buff_pos = fill_offset; | ||
| 442 | for (int i : {0, 1, 2, 3}) | ||
| 443 | fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; | ||
| 444 | |||
| 445 | FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), | ||
| 446 | draw_framebuffer.handle); | ||
| 447 | return; | ||
| 448 | } | ||
| 449 | if (src_surface->CanSubRect(subrect_params)) { | ||
| 450 | BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), | ||
| 451 | dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), | ||
| 452 | src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); | ||
| 453 | return; | ||
| 454 | } | ||
| 455 | UNREACHABLE(); | ||
| 456 | } | 185 | } |
| 457 | 186 | ||
| 458 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); | 187 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); |
| 459 | void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { | 188 | void CachedSurface::LoadGLBuffer() { |
| 460 | ASSERT(type != SurfaceType::Fill); | 189 | ASSERT(params.type != SurfaceType::Fill); |
| 461 | 190 | ||
| 462 | u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); | 191 | u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); |
| 463 | if (texture_src_data == nullptr) | ||
| 464 | return; | ||
| 465 | 192 | ||
| 466 | if (gl_buffer == nullptr) { | 193 | ASSERT(texture_src_data); |
| 467 | gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); | 194 | |
| 195 | if (!gl_buffer) { | ||
| 196 | gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); | ||
| 468 | gl_buffer.reset(new u8[gl_buffer_size]); | 197 | gl_buffer.reset(new u8[gl_buffer_size]); |
| 469 | } | 198 | } |
| 470 | 199 | ||
| 471 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | 200 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); |
| 472 | 201 | ||
| 473 | ASSERT(load_start >= addr && load_end <= end); | 202 | if (!params.is_tiled) { |
| 474 | const u64 start_offset = load_start - addr; | 203 | const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; |
| 475 | |||
| 476 | if (!is_tiled) { | ||
| 477 | const u32 bytes_per_pixel{GetFormatBpp() >> 3}; | ||
| 478 | 204 | ||
| 479 | std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, | 205 | std::memcpy(&gl_buffer[0], texture_src_data, |
| 480 | bytes_per_pixel * width * height); | 206 | bytes_per_pixel * params.width * params.height); |
| 481 | } else { | 207 | } else { |
| 482 | morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, | 208 | morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( |
| 483 | GetActualHeight(), &gl_buffer[0], addr, | 209 | params.width, params.block_height, params.height, &gl_buffer[0], params.addr); |
| 484 | load_start, load_end); | ||
| 485 | } | 210 | } |
| 486 | } | 211 | } |
| 487 | 212 | ||
| 488 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | 213 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); |
| 489 | void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { | 214 | void CachedSurface::FlushGLBuffer() { |
| 490 | u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); | 215 | u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); |
| 491 | if (dst_buffer == nullptr) | ||
| 492 | return; | ||
| 493 | |||
| 494 | ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); | ||
| 495 | |||
| 496 | // TODO: Should probably be done in ::Memory:: and check for other regions too | ||
| 497 | // same as loadglbuffer() | ||
| 498 | if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) | ||
| 499 | flush_end = Memory::VRAM_VADDR_END; | ||
| 500 | 216 | ||
| 501 | if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) | 217 | ASSERT(dst_buffer); |
| 502 | flush_start = Memory::VRAM_VADDR; | 218 | ASSERT(gl_buffer_size == |
| 219 | params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); | ||
| 503 | 220 | ||
| 504 | MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); | 221 | MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); |
| 505 | 222 | ||
| 506 | ASSERT(flush_start >= addr && flush_end <= end); | 223 | if (!params.is_tiled) { |
| 507 | const u64 start_offset = flush_start - addr; | 224 | std::memcpy(dst_buffer, &gl_buffer[0], params.SizeInBytes()); |
| 508 | const u64 end_offset = flush_end - addr; | ||
| 509 | |||
| 510 | if (type == SurfaceType::Fill) { | ||
| 511 | const u64 coarse_start_offset = start_offset - (start_offset % fill_size); | ||
| 512 | const u64 backup_bytes = start_offset % fill_size; | ||
| 513 | std::array<u8, 4> backup_data; | ||
| 514 | if (backup_bytes) | ||
| 515 | std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); | ||
| 516 | |||
| 517 | for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { | ||
| 518 | std::memcpy(&dst_buffer[offset], &fill_data[0], | ||
| 519 | std::min(fill_size, end_offset - offset)); | ||
| 520 | } | ||
| 521 | |||
| 522 | if (backup_bytes) | ||
| 523 | std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); | ||
| 524 | } else if (!is_tiled) { | ||
| 525 | std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); | ||
| 526 | } else { | 225 | } else { |
| 527 | gl_to_morton_fns[static_cast<size_t>(pixel_format)]( | 226 | gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( |
| 528 | stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); | 227 | params.width, params.block_height, params.height, &gl_buffer[0], params.addr); |
| 529 | } | 228 | } |
| 530 | } | 229 | } |
| 531 | 230 | ||
| 532 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); | 231 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); |
| 533 | void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | 232 | void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { |
| 534 | GLuint draw_fb_handle) { | 233 | if (params.type == SurfaceType::Fill) |
| 535 | if (type == SurfaceType::Fill) | ||
| 536 | return; | 234 | return; |
| 537 | 235 | ||
| 538 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | 236 | MICROPROFILE_SCOPE(OpenGL_TextureUL); |
| 539 | 237 | ||
| 540 | ASSERT(gl_buffer_size == | 238 | ASSERT(gl_buffer_size == |
| 541 | GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); | 239 | params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); |
| 240 | |||
| 241 | const auto& rect{params.GetRect()}; | ||
| 542 | 242 | ||
| 543 | // Load data from memory to the surface | 243 | // Load data from memory to the surface |
| 544 | GLint x0 = static_cast<GLint>(rect.left); | 244 | GLint x0 = static_cast<GLint>(rect.left); |
| 545 | GLint y0 = static_cast<GLint>(rect.bottom); | 245 | GLint y0 = static_cast<GLint>(rect.bottom); |
| 546 | size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); | 246 | size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); |
| 547 | 247 | ||
| 548 | const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); | 248 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); |
| 549 | GLuint target_tex = texture.handle; | 249 | GLuint target_tex = texture.handle; |
| 550 | |||
| 551 | // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in | ||
| 552 | // surface | ||
| 553 | OGLTexture unscaled_tex; | ||
| 554 | if (res_scale != 1) { | ||
| 555 | x0 = 0; | ||
| 556 | y0 = 0; | ||
| 557 | |||
| 558 | unscaled_tex.Create(); | ||
| 559 | AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); | ||
| 560 | target_tex = unscaled_tex.handle; | ||
| 561 | } | ||
| 562 | |||
| 563 | OpenGLState cur_state = OpenGLState::GetCurState(); | 250 | OpenGLState cur_state = OpenGLState::GetCurState(); |
| 564 | 251 | ||
| 565 | GLuint old_tex = cur_state.texture_units[0].texture_2d; | 252 | GLuint old_tex = cur_state.texture_units[0].texture_2d; |
| @@ -567,15 +254,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint | |||
| 567 | cur_state.Apply(); | 254 | cur_state.Apply(); |
| 568 | 255 | ||
| 569 | // Ensure no bad interactions with GL_UNPACK_ALIGNMENT | 256 | // Ensure no bad interactions with GL_UNPACK_ALIGNMENT |
| 570 | ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); | 257 | ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); |
| 571 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); | 258 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width)); |
| 572 | 259 | ||
| 573 | glActiveTexture(GL_TEXTURE0); | 260 | glActiveTexture(GL_TEXTURE0); |
| 574 | if (tuple.compressed) { | 261 | if (tuple.compressed) { |
| 575 | glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, | 262 | glCompressedTexImage2D( |
| 576 | static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), | 263 | GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), |
| 577 | static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, | 264 | static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.SizeInBytes()), |
| 578 | static_cast<GLsizei>(size), &gl_buffer[buffer_offset]); | 265 | &gl_buffer[buffer_offset]); |
| 579 | } else { | 266 | } else { |
| 580 | glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), | 267 | glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), |
| 581 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | 268 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, |
| @@ -586,29 +273,17 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint | |||
| 586 | 273 | ||
| 587 | cur_state.texture_units[0].texture_2d = old_tex; | 274 | cur_state.texture_units[0].texture_2d = old_tex; |
| 588 | cur_state.Apply(); | 275 | cur_state.Apply(); |
| 589 | |||
| 590 | if (res_scale != 1) { | ||
| 591 | auto scaled_rect = rect; | ||
| 592 | scaled_rect.left *= res_scale; | ||
| 593 | scaled_rect.top *= res_scale; | ||
| 594 | scaled_rect.right *= res_scale; | ||
| 595 | scaled_rect.bottom *= res_scale; | ||
| 596 | |||
| 597 | BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, | ||
| 598 | scaled_rect, type, read_fb_handle, draw_fb_handle); | ||
| 599 | } | ||
| 600 | } | 276 | } |
| 601 | 277 | ||
| 602 | MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); | 278 | MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); |
| 603 | void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | 279 | void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { |
| 604 | GLuint draw_fb_handle) { | 280 | if (params.type == SurfaceType::Fill) |
| 605 | if (type == SurfaceType::Fill) | ||
| 606 | return; | 281 | return; |
| 607 | 282 | ||
| 608 | MICROPROFILE_SCOPE(OpenGL_TextureDL); | 283 | MICROPROFILE_SCOPE(OpenGL_TextureDL); |
| 609 | 284 | ||
| 610 | if (gl_buffer == nullptr) { | 285 | if (!gl_buffer) { |
| 611 | gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); | 286 | gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); |
| 612 | gl_buffer.reset(new u8[gl_buffer_size]); | 287 | gl_buffer.reset(new u8[gl_buffer_size]); |
| 613 | } | 288 | } |
| 614 | 289 | ||
| @@ -616,437 +291,45 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui | |||
| 616 | OpenGLState prev_state = state; | 291 | OpenGLState prev_state = state; |
| 617 | SCOPE_EXIT({ prev_state.Apply(); }); | 292 | SCOPE_EXIT({ prev_state.Apply(); }); |
| 618 | 293 | ||
| 619 | const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); | 294 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); |
| 620 | 295 | ||
| 621 | // Ensure no bad interactions with GL_PACK_ALIGNMENT | 296 | // Ensure no bad interactions with GL_PACK_ALIGNMENT |
| 622 | ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); | 297 | ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); |
| 623 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); | 298 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); |
| 624 | size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); | ||
| 625 | |||
| 626 | // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush | ||
| 627 | if (res_scale != 1) { | ||
| 628 | auto scaled_rect = rect; | ||
| 629 | scaled_rect.left *= res_scale; | ||
| 630 | scaled_rect.top *= res_scale; | ||
| 631 | scaled_rect.right *= res_scale; | ||
| 632 | scaled_rect.bottom *= res_scale; | ||
| 633 | |||
| 634 | OGLTexture unscaled_tex; | ||
| 635 | unscaled_tex.Create(); | ||
| 636 | |||
| 637 | MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; | ||
| 638 | AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); | ||
| 639 | BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, | ||
| 640 | read_fb_handle, draw_fb_handle); | ||
| 641 | |||
| 642 | state.texture_units[0].texture_2d = unscaled_tex.handle; | ||
| 643 | state.Apply(); | ||
| 644 | |||
| 645 | glActiveTexture(GL_TEXTURE0); | ||
| 646 | glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||
| 647 | } else { | ||
| 648 | state.UnbindTexture(texture.handle); | ||
| 649 | state.draw.read_framebuffer = read_fb_handle; | ||
| 650 | state.Apply(); | ||
| 651 | |||
| 652 | if (type == SurfaceType::ColorTexture) { | ||
| 653 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||
| 654 | texture.handle, 0); | ||
| 655 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 656 | 0, 0); | ||
| 657 | } else if (type == SurfaceType::Depth) { | ||
| 658 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 659 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 660 | texture.handle, 0); | ||
| 661 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 662 | } else { | ||
| 663 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 664 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 665 | texture.handle, 0); | ||
| 666 | } | ||
| 667 | glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), | ||
| 668 | static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), | ||
| 669 | tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||
| 670 | } | ||
| 671 | 299 | ||
| 672 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 300 | const auto& rect{params.GetRect()}; |
| 673 | } | 301 | size_t buffer_offset = |
| 302 | (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format); | ||
| 674 | 303 | ||
| 675 | enum class MatchFlags { | 304 | state.UnbindTexture(texture.handle); |
| 676 | None = 0, | 305 | state.draw.read_framebuffer = read_fb_handle; |
| 677 | Invalid = 1, // Flag that can be applied to other match types, invalid matches require | 306 | state.Apply(); |
| 678 | // validation before they can be used | ||
| 679 | Exact = 1 << 1, // Surfaces perfectly match | ||
| 680 | SubRect = 1 << 2, // Surface encompasses params | ||
| 681 | Copy = 1 << 3, // Surface we can copy from | ||
| 682 | Expand = 1 << 4, // Surface that can expand params | ||
| 683 | TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters | ||
| 684 | }; | ||
| 685 | |||
| 686 | constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { | ||
| 687 | return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); | ||
| 688 | } | ||
| 689 | |||
| 690 | constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { | ||
| 691 | return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); | ||
| 692 | } | ||
| 693 | 307 | ||
| 694 | /// Get the best surface match (and its match type) for the given flags | 308 | if (params.type == SurfaceType::ColorTexture) { |
| 695 | template <MatchFlags find_flags> | 309 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 696 | Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, | 310 | texture.handle, 0); |
| 697 | ScaleMatch match_scale_type, | 311 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, |
| 698 | boost::optional<SurfaceInterval> validate_interval = boost::none) { | 312 | 0); |
| 699 | Surface match_surface = nullptr; | 313 | } else if (params.type == SurfaceType::Depth) { |
| 700 | bool match_valid = false; | 314 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
| 701 | u32 match_scale = 0; | 315 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, |
| 702 | SurfaceInterval match_interval{}; | 316 | texture.handle, 0); |
| 703 | 317 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | |
| 704 | for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { | 318 | } else { |
| 705 | for (auto& surface : pair.second) { | 319 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
| 706 | bool res_scale_matched = match_scale_type == ScaleMatch::Exact | 320 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, |
| 707 | ? (params.res_scale == surface->res_scale) | 321 | texture.handle, 0); |
| 708 | : (params.res_scale <= surface->res_scale); | ||
| 709 | // validity will be checked in GetCopyableInterval | ||
| 710 | bool is_valid = | ||
| 711 | (find_flags & MatchFlags::Copy) != MatchFlags::None | ||
| 712 | ? true | ||
| 713 | : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); | ||
| 714 | |||
| 715 | if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) | ||
| 716 | continue; | ||
| 717 | |||
| 718 | auto IsMatch_Helper = [&](auto check_type, auto match_fn) { | ||
| 719 | if ((find_flags & check_type) == MatchFlags::None) | ||
| 720 | return; | ||
| 721 | |||
| 722 | bool matched; | ||
| 723 | SurfaceInterval surface_interval; | ||
| 724 | std::tie(matched, surface_interval) = match_fn(); | ||
| 725 | if (!matched) | ||
| 726 | return; | ||
| 727 | |||
| 728 | if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && | ||
| 729 | surface->type != SurfaceType::Fill) | ||
| 730 | return; | ||
| 731 | |||
| 732 | // Found a match, update only if this is better than the previous one | ||
| 733 | auto UpdateMatch = [&] { | ||
| 734 | match_surface = surface; | ||
| 735 | match_valid = is_valid; | ||
| 736 | match_scale = surface->res_scale; | ||
| 737 | match_interval = surface_interval; | ||
| 738 | }; | ||
| 739 | |||
| 740 | if (surface->res_scale > match_scale) { | ||
| 741 | UpdateMatch(); | ||
| 742 | return; | ||
| 743 | } else if (surface->res_scale < match_scale) { | ||
| 744 | return; | ||
| 745 | } | ||
| 746 | |||
| 747 | if (is_valid && !match_valid) { | ||
| 748 | UpdateMatch(); | ||
| 749 | return; | ||
| 750 | } else if (is_valid != match_valid) { | ||
| 751 | return; | ||
| 752 | } | ||
| 753 | |||
| 754 | if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { | ||
| 755 | UpdateMatch(); | ||
| 756 | } | ||
| 757 | }; | ||
| 758 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { | ||
| 759 | return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); | ||
| 760 | }); | ||
| 761 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { | ||
| 762 | return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); | ||
| 763 | }); | ||
| 764 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { | ||
| 765 | auto copy_interval = | ||
| 766 | params.FromInterval(*validate_interval).GetCopyableInterval(surface); | ||
| 767 | bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && | ||
| 768 | surface->CanCopy(params, copy_interval); | ||
| 769 | return std::make_pair(matched, copy_interval); | ||
| 770 | }); | ||
| 771 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { | ||
| 772 | return std::make_pair(surface->CanExpand(params), surface->GetInterval()); | ||
| 773 | }); | ||
| 774 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { | ||
| 775 | return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); | ||
| 776 | }); | ||
| 777 | } | ||
| 778 | } | 322 | } |
| 779 | return match_surface; | 323 | glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), |
| 324 | static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), | ||
| 325 | tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||
| 326 | |||
| 327 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 780 | } | 328 | } |
| 781 | 329 | ||
| 782 | RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | 330 | RasterizerCacheOpenGL::RasterizerCacheOpenGL() { |
| 783 | read_framebuffer.Create(); | 331 | read_framebuffer.Create(); |
| 784 | draw_framebuffer.Create(); | 332 | draw_framebuffer.Create(); |
| 785 | |||
| 786 | attributeless_vao.Create(); | ||
| 787 | |||
| 788 | d24s8_abgr_buffer.Create(); | ||
| 789 | d24s8_abgr_buffer_size = 0; | ||
| 790 | |||
| 791 | const char* vs_source = R"( | ||
| 792 | #version 330 core | ||
| 793 | const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); | ||
| 794 | void main() { | ||
| 795 | gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); | ||
| 796 | } | ||
| 797 | )"; | ||
| 798 | const char* fs_source = R"( | ||
| 799 | #version 330 core | ||
| 800 | |||
| 801 | uniform samplerBuffer tbo; | ||
| 802 | uniform vec2 tbo_size; | ||
| 803 | uniform vec4 viewport; | ||
| 804 | |||
| 805 | out vec4 color; | ||
| 806 | |||
| 807 | void main() { | ||
| 808 | vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; | ||
| 809 | int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); | ||
| 810 | color = texelFetch(tbo, tbo_offset).rabg; | ||
| 811 | } | ||
| 812 | )"; | ||
| 813 | d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); | ||
| 814 | |||
| 815 | OpenGLState state = OpenGLState::GetCurState(); | ||
| 816 | GLuint old_program = state.draw.shader_program; | ||
| 817 | state.draw.shader_program = d24s8_abgr_shader.handle; | ||
| 818 | state.Apply(); | ||
| 819 | |||
| 820 | GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); | ||
| 821 | ASSERT(tbo_u_id != -1); | ||
| 822 | glUniform1i(tbo_u_id, 0); | ||
| 823 | |||
| 824 | state.draw.shader_program = old_program; | ||
| 825 | state.Apply(); | ||
| 826 | |||
| 827 | d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); | ||
| 828 | ASSERT(d24s8_abgr_tbo_size_u_id != -1); | ||
| 829 | d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); | ||
| 830 | ASSERT(d24s8_abgr_viewport_u_id != -1); | ||
| 831 | } | ||
| 832 | |||
| 833 | RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | ||
| 834 | FlushAll(); | ||
| 835 | while (!surface_cache.empty()) | ||
| 836 | UnregisterSurface(*surface_cache.begin()->second.begin()); | ||
| 837 | } | ||
| 838 | |||
| 839 | bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, | ||
| 840 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 841 | const Surface& dst_surface, | ||
| 842 | const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 843 | if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) | ||
| 844 | return false; | ||
| 845 | |||
| 846 | return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, | ||
| 847 | dst_rect, src_surface->type, read_framebuffer.handle, | ||
| 848 | draw_framebuffer.handle); | ||
| 849 | } | ||
| 850 | |||
| 851 | void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, | ||
| 852 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 853 | GLuint dst_tex, | ||
| 854 | const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 855 | OpenGLState prev_state = OpenGLState::GetCurState(); | ||
| 856 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 857 | |||
| 858 | OpenGLState state; | ||
| 859 | state.draw.read_framebuffer = read_framebuffer.handle; | ||
| 860 | state.draw.draw_framebuffer = draw_framebuffer.handle; | ||
| 861 | state.Apply(); | ||
| 862 | |||
| 863 | glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); | ||
| 864 | |||
| 865 | GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; | ||
| 866 | if (target_pbo_size > d24s8_abgr_buffer_size) { | ||
| 867 | d24s8_abgr_buffer_size = target_pbo_size * 2; | ||
| 868 | glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); | ||
| 869 | } | ||
| 870 | |||
| 871 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 872 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, | ||
| 873 | 0); | ||
| 874 | glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), | ||
| 875 | static_cast<GLsizei>(src_rect.GetWidth()), | ||
| 876 | static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, | ||
| 877 | 0); | ||
| 878 | |||
| 879 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 880 | |||
| 881 | // PBO now contains src_tex in RABG format | ||
| 882 | state.draw.shader_program = d24s8_abgr_shader.handle; | ||
| 883 | state.draw.vertex_array = attributeless_vao.handle; | ||
| 884 | state.viewport.x = static_cast<GLint>(dst_rect.left); | ||
| 885 | state.viewport.y = static_cast<GLint>(dst_rect.bottom); | ||
| 886 | state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); | ||
| 887 | state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); | ||
| 888 | state.Apply(); | ||
| 889 | |||
| 890 | OGLTexture tbo; | ||
| 891 | tbo.Create(); | ||
| 892 | glActiveTexture(GL_TEXTURE0); | ||
| 893 | glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); | ||
| 894 | glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); | ||
| 895 | |||
| 896 | glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), | ||
| 897 | static_cast<GLfloat>(src_rect.GetHeight())); | ||
| 898 | glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), | ||
| 899 | static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), | ||
| 900 | static_cast<GLfloat>(state.viewport.height)); | ||
| 901 | |||
| 902 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); | ||
| 903 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 904 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | ||
| 905 | |||
| 906 | glBindTexture(GL_TEXTURE_BUFFER, 0); | ||
| 907 | } | ||
| 908 | |||
| 909 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, | ||
| 910 | bool load_if_create) { | ||
| 911 | if (params.addr == 0 || params.height * params.width == 0) { | ||
| 912 | return nullptr; | ||
| 913 | } | ||
| 914 | // Use GetSurfaceSubRect instead | ||
| 915 | ASSERT(params.width == params.stride); | ||
| 916 | |||
| 917 | // Check for an exact match in existing surfaces | ||
| 918 | Surface surface = | ||
| 919 | FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); | ||
| 920 | |||
| 921 | if (surface == nullptr) { | ||
| 922 | u16 target_res_scale = params.res_scale; | ||
| 923 | if (match_res_scale != ScaleMatch::Exact) { | ||
| 924 | // This surface may have a subrect of another surface with a higher res_scale, find it | ||
| 925 | // to adjust our params | ||
| 926 | SurfaceParams find_params = params; | ||
| 927 | Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( | ||
| 928 | surface_cache, find_params, match_res_scale); | ||
| 929 | if (expandable != nullptr && expandable->res_scale > target_res_scale) { | ||
| 930 | target_res_scale = expandable->res_scale; | ||
| 931 | } | ||
| 932 | } | ||
| 933 | SurfaceParams new_params = params; | ||
| 934 | new_params.res_scale = target_res_scale; | ||
| 935 | surface = CreateSurface(new_params); | ||
| 936 | RegisterSurface(surface); | ||
| 937 | } | ||
| 938 | |||
| 939 | if (load_if_create) { | ||
| 940 | ValidateSurface(surface, params.addr, params.size); | ||
| 941 | } | ||
| 942 | |||
| 943 | return surface; | ||
| 944 | } | ||
| 945 | |||
| 946 | boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( | ||
| 947 | VAddr cpu_addr) const { | ||
| 948 | // Tries to find the GPU address of a framebuffer based on the CPU address. This is because | ||
| 949 | // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU | ||
| 950 | // addresses. We iterate through all cached framebuffers, and compare their starting CPU address | ||
| 951 | // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps | ||
| 952 | // surfaces. | ||
| 953 | |||
| 954 | std::vector<Tegra::GPUVAddr> gpu_addresses; | ||
| 955 | for (const auto& pair : surface_cache) { | ||
| 956 | for (const auto& surface : pair.second) { | ||
| 957 | const VAddr surface_cpu_addr = surface->GetCpuAddr(); | ||
| 958 | if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { | ||
| 959 | ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); | ||
| 960 | gpu_addresses.push_back(surface->addr); | ||
| 961 | } | ||
| 962 | } | ||
| 963 | } | ||
| 964 | |||
| 965 | if (gpu_addresses.empty()) { | ||
| 966 | return {}; | ||
| 967 | } | ||
| 968 | |||
| 969 | ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); | ||
| 970 | return gpu_addresses[0]; | ||
| 971 | } | ||
| 972 | |||
| 973 | SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, | ||
| 974 | ScaleMatch match_res_scale, | ||
| 975 | bool load_if_create) { | ||
| 976 | if (params.addr == 0 || params.height * params.width == 0) { | ||
| 977 | return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); | ||
| 978 | } | ||
| 979 | |||
| 980 | // Attempt to find encompassing surface | ||
| 981 | Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, | ||
| 982 | match_res_scale); | ||
| 983 | |||
| 984 | // Check if FindMatch failed because of res scaling | ||
| 985 | // If that's the case create a new surface with | ||
| 986 | // the dimensions of the lower res_scale surface | ||
| 987 | // to suggest it should not be used again | ||
| 988 | if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { | ||
| 989 | surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, | ||
| 990 | ScaleMatch::Ignore); | ||
| 991 | if (surface != nullptr) { | ||
| 992 | ASSERT(surface->res_scale < params.res_scale); | ||
| 993 | SurfaceParams new_params = *surface; | ||
| 994 | new_params.res_scale = params.res_scale; | ||
| 995 | |||
| 996 | surface = CreateSurface(new_params); | ||
| 997 | RegisterSurface(surface); | ||
| 998 | } | ||
| 999 | } | ||
| 1000 | |||
| 1001 | SurfaceParams aligned_params = params; | ||
| 1002 | if (params.is_tiled) { | ||
| 1003 | aligned_params.height = Common::AlignUp(params.height, 8); | ||
| 1004 | aligned_params.width = Common::AlignUp(params.width, 8); | ||
| 1005 | aligned_params.stride = Common::AlignUp(params.stride, 8); | ||
| 1006 | aligned_params.UpdateParams(); | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | // Check for a surface we can expand before creating a new one | ||
| 1010 | if (surface == nullptr) { | ||
| 1011 | surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, | ||
| 1012 | match_res_scale); | ||
| 1013 | if (surface != nullptr) { | ||
| 1014 | aligned_params.width = aligned_params.stride; | ||
| 1015 | aligned_params.UpdateParams(); | ||
| 1016 | |||
| 1017 | SurfaceParams new_params = *surface; | ||
| 1018 | new_params.addr = std::min(aligned_params.addr, surface->addr); | ||
| 1019 | new_params.end = std::max(aligned_params.end, surface->end); | ||
| 1020 | new_params.size = new_params.end - new_params.addr; | ||
| 1021 | new_params.height = static_cast<u32>( | ||
| 1022 | new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); | ||
| 1023 | ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); | ||
| 1024 | |||
| 1025 | Surface new_surface = CreateSurface(new_params); | ||
| 1026 | DuplicateSurface(surface, new_surface); | ||
| 1027 | |||
| 1028 | // Delete the expanded surface, this can't be done safely yet | ||
| 1029 | // because it may still be in use | ||
| 1030 | remove_surfaces.emplace(surface); | ||
| 1031 | |||
| 1032 | surface = new_surface; | ||
| 1033 | RegisterSurface(new_surface); | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | // No subrect found - create and return a new surface | ||
| 1038 | if (surface == nullptr) { | ||
| 1039 | SurfaceParams new_params = aligned_params; | ||
| 1040 | // Can't have gaps in a surface | ||
| 1041 | new_params.width = aligned_params.stride; | ||
| 1042 | new_params.UpdateParams(); | ||
| 1043 | // GetSurface will create the new surface and possibly adjust res_scale if necessary | ||
| 1044 | surface = GetSurface(new_params, match_res_scale, load_if_create); | ||
| 1045 | } else if (load_if_create) { | ||
| 1046 | ValidateSurface(surface, aligned_params.addr, aligned_params.size); | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | return std::make_tuple(surface, surface->GetScaledSubRect(params)); | ||
| 1050 | } | 333 | } |
| 1051 | 334 | ||
| 1052 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { | 335 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { |
| @@ -1056,36 +339,21 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu | |||
| 1056 | params.addr = config.tic.Address(); | 339 | params.addr = config.tic.Address(); |
| 1057 | params.is_tiled = config.tic.IsTiled(); | 340 | params.is_tiled = config.tic.IsTiled(); |
| 1058 | params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); | 341 | params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); |
| 342 | params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); | ||
| 343 | params.type = SurfaceParams::GetFormatType(params.pixel_format); | ||
| 344 | params.width = Common::AlignUp(config.tic.Width(), params.GetCompressionFactor()); | ||
| 345 | params.height = Common::AlignUp(config.tic.Height(), params.GetCompressionFactor()); | ||
| 1059 | 346 | ||
| 1060 | params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) / | 347 | if (params.is_tiled) { |
| 1061 | params.GetCompresssionFactor(); | 348 | params.block_height = config.tic.BlockHeight(); |
| 1062 | params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) / | 349 | } |
| 1063 | params.GetCompresssionFactor(); | ||
| 1064 | 350 | ||
| 1065 | // TODO(Subv): Different types per component are not supported. | 351 | // TODO(Subv): Different types per component are not supported. |
| 1066 | ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && | 352 | ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && |
| 1067 | config.tic.r_type.Value() == config.tic.b_type.Value() && | 353 | config.tic.r_type.Value() == config.tic.b_type.Value() && |
| 1068 | config.tic.r_type.Value() == config.tic.a_type.Value()); | 354 | config.tic.r_type.Value() == config.tic.a_type.Value()); |
| 1069 | 355 | ||
| 1070 | params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); | 356 | return GetSurface(params); |
| 1071 | |||
| 1072 | if (config.tic.IsTiled()) { | ||
| 1073 | params.block_height = config.tic.BlockHeight(); | ||
| 1074 | |||
| 1075 | // TODO(bunnei): The below align up is a hack. This is here because some compressed textures | ||
| 1076 | // are not a multiple of their own compression factor, and so this accounts for that. This | ||
| 1077 | // could potentially result in an extra row of 4px being decoded if a texture is not a | ||
| 1078 | // multiple of 4. | ||
| 1079 | params.width = Common::AlignUp(params.width, 4); | ||
| 1080 | params.height = Common::AlignUp(params.height, 4); | ||
| 1081 | } else { | ||
| 1082 | // Use the texture-provided stride value if the texture isn't tiled. | ||
| 1083 | params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | params.UpdateParams(); | ||
| 1087 | |||
| 1088 | return GetSurface(params, ScaleMatch::Ignore, true); | ||
| 1089 | } | 357 | } |
| 1090 | 358 | ||
| 1091 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | 359 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( |
| @@ -1096,17 +364,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | |||
| 1096 | // TODO(bunnei): This is hard corded to use just the first render buffer | 364 | // TODO(bunnei): This is hard corded to use just the first render buffer |
| 1097 | NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); | 365 | NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); |
| 1098 | 366 | ||
| 1099 | // update resolution_scale_factor and reset cache if changed | ||
| 1100 | // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We | ||
| 1101 | // need to fix this before making the renderer multi-threaded. | ||
| 1102 | static u16 resolution_scale_factor = GetResolutionScaleFactor(); | ||
| 1103 | if (resolution_scale_factor != GetResolutionScaleFactor()) { | ||
| 1104 | resolution_scale_factor = GetResolutionScaleFactor(); | ||
| 1105 | FlushAll(); | ||
| 1106 | while (!surface_cache.empty()) | ||
| 1107 | UnregisterSurface(*surface_cache.begin()->second.begin()); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | MathUtil::Rectangle<u32> viewport_clamped{ | 367 | MathUtil::Rectangle<u32> viewport_clamped{ |
| 1111 | static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), | 368 | static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), |
| 1112 | static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), | 369 | static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), |
| @@ -1116,7 +373,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | |||
| 1116 | // get color and depth surfaces | 373 | // get color and depth surfaces |
| 1117 | SurfaceParams color_params; | 374 | SurfaceParams color_params; |
| 1118 | color_params.is_tiled = true; | 375 | color_params.is_tiled = true; |
| 1119 | color_params.res_scale = resolution_scale_factor; | ||
| 1120 | color_params.width = config.width; | 376 | color_params.width = config.width; |
| 1121 | color_params.height = config.height; | 377 | color_params.height = config.height; |
| 1122 | // TODO(Subv): Can framebuffers use a different block height? | 378 | // TODO(Subv): Can framebuffers use a different block height? |
| @@ -1126,319 +382,69 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | |||
| 1126 | color_params.addr = config.Address(); | 382 | color_params.addr = config.Address(); |
| 1127 | color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); | 383 | color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); |
| 1128 | color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); | 384 | color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); |
| 1129 | color_params.UpdateParams(); | 385 | color_params.type = SurfaceParams::GetFormatType(color_params.pixel_format); |
| 1130 | 386 | ||
| 1131 | ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); | 387 | ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); |
| 1132 | // depth_params.addr = config.GetDepthBufferPhysicalAddress(); | ||
| 1133 | // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); | ||
| 1134 | // depth_params.UpdateParams(); | ||
| 1135 | |||
| 1136 | auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); | ||
| 1137 | auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); | ||
| 1138 | |||
| 1139 | // Make sure that framebuffers don't overlap if both color and depth are being used | ||
| 1140 | if (using_color_fb && using_depth_fb && | ||
| 1141 | boost::icl::length(color_vp_interval & depth_vp_interval)) { | ||
| 1142 | NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " | ||
| 1143 | "overlapping framebuffers not supported!"); | ||
| 1144 | using_depth_fb = false; | ||
| 1145 | } | ||
| 1146 | 388 | ||
| 1147 | MathUtil::Rectangle<u32> color_rect{}; | 389 | MathUtil::Rectangle<u32> color_rect{}; |
| 1148 | Surface color_surface = nullptr; | 390 | Surface color_surface; |
| 1149 | if (using_color_fb) | 391 | if (using_color_fb) { |
| 1150 | std::tie(color_surface, color_rect) = | 392 | color_surface = GetSurface(color_params); |
| 1151 | GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); | 393 | color_rect = color_surface->GetSurfaceParams().GetRect(); |
| 394 | } | ||
| 1152 | 395 | ||
| 1153 | MathUtil::Rectangle<u32> depth_rect{}; | 396 | MathUtil::Rectangle<u32> depth_rect{}; |
| 1154 | Surface depth_surface = nullptr; | 397 | Surface depth_surface; |
| 1155 | if (using_depth_fb) | 398 | if (using_depth_fb) { |
| 1156 | std::tie(depth_surface, depth_rect) = | 399 | depth_surface = GetSurface(depth_params); |
| 1157 | GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); | 400 | depth_rect = depth_surface->GetSurfaceParams().GetRect(); |
| 401 | } | ||
| 1158 | 402 | ||
| 1159 | MathUtil::Rectangle<u32> fb_rect{}; | 403 | MathUtil::Rectangle<u32> fb_rect{}; |
| 1160 | if (color_surface != nullptr && depth_surface != nullptr) { | 404 | if (color_surface && depth_surface) { |
| 1161 | fb_rect = color_rect; | 405 | fb_rect = color_rect; |
| 1162 | // Color and Depth surfaces must have the same dimensions and offsets | 406 | // Color and Depth surfaces must have the same dimensions and offsets |
| 1163 | if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || | 407 | if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || |
| 1164 | color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { | 408 | color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { |
| 1165 | color_surface = GetSurface(color_params, ScaleMatch::Exact, false); | 409 | color_surface = GetSurface(color_params); |
| 1166 | depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); | 410 | depth_surface = GetSurface(depth_params); |
| 1167 | fb_rect = color_surface->GetScaledRect(); | 411 | fb_rect = color_surface->GetSurfaceParams().GetRect(); |
| 1168 | } | 412 | } |
| 1169 | } else if (color_surface != nullptr) { | 413 | } else if (color_surface) { |
| 1170 | fb_rect = color_rect; | 414 | fb_rect = color_rect; |
| 1171 | } else if (depth_surface != nullptr) { | 415 | } else if (depth_surface) { |
| 1172 | fb_rect = depth_rect; | 416 | fb_rect = depth_rect; |
| 1173 | } | 417 | } |
| 1174 | 418 | ||
| 1175 | if (color_surface != nullptr) { | ||
| 1176 | ValidateSurface(color_surface, boost::icl::first(color_vp_interval), | ||
| 1177 | boost::icl::length(color_vp_interval)); | ||
| 1178 | } | ||
| 1179 | if (depth_surface != nullptr) { | ||
| 1180 | ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), | ||
| 1181 | boost::icl::length(depth_vp_interval)); | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | return std::make_tuple(color_surface, depth_surface, fb_rect); | 419 | return std::make_tuple(color_surface, depth_surface, fb_rect); |
| 1185 | } | 420 | } |
| 1186 | 421 | ||
| 1187 | Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { | 422 | void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { |
| 1188 | UNREACHABLE(); | 423 | surface->LoadGLBuffer(); |
| 1189 | return {}; | 424 | surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); |
| 1190 | } | ||
| 1191 | |||
| 1192 | SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { | ||
| 1193 | MathUtil::Rectangle<u32> rect{}; | ||
| 1194 | |||
| 1195 | Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( | ||
| 1196 | surface_cache, params, ScaleMatch::Ignore); | ||
| 1197 | |||
| 1198 | if (match_surface != nullptr) { | ||
| 1199 | ValidateSurface(match_surface, params.addr, params.size); | ||
| 1200 | |||
| 1201 | SurfaceParams match_subrect; | ||
| 1202 | if (params.width != params.stride) { | ||
| 1203 | const u32 tiled_size = match_surface->is_tiled ? 8 : 1; | ||
| 1204 | match_subrect = params; | ||
| 1205 | match_subrect.width = | ||
| 1206 | static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); | ||
| 1207 | match_subrect.stride = | ||
| 1208 | static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); | ||
| 1209 | match_subrect.height *= tiled_size; | ||
| 1210 | } else { | ||
| 1211 | match_subrect = match_surface->FromInterval(params.GetInterval()); | ||
| 1212 | ASSERT(match_subrect.GetInterval() == params.GetInterval()); | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | rect = match_surface->GetScaledSubRect(match_subrect); | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | return std::make_tuple(match_surface, rect); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, | ||
| 1222 | const Surface& dest_surface) { | ||
| 1223 | ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); | ||
| 1224 | |||
| 1225 | BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, | ||
| 1226 | dest_surface->GetScaledSubRect(*src_surface)); | ||
| 1227 | |||
| 1228 | dest_surface->invalid_regions -= src_surface->GetInterval(); | ||
| 1229 | dest_surface->invalid_regions += src_surface->invalid_regions; | ||
| 1230 | |||
| 1231 | SurfaceRegions regions; | ||
| 1232 | for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { | ||
| 1233 | if (pair.second == src_surface) { | ||
| 1234 | regions += pair.first; | ||
| 1235 | } | ||
| 1236 | } | ||
| 1237 | for (auto& interval : regions) { | ||
| 1238 | dirty_regions.set({interval, dest_surface}); | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, | ||
| 1243 | u64 size) { | ||
| 1244 | if (size == 0) | ||
| 1245 | return; | ||
| 1246 | |||
| 1247 | const SurfaceInterval validate_interval(addr, addr + size); | ||
| 1248 | |||
| 1249 | if (surface->type == SurfaceType::Fill) { | ||
| 1250 | // Sanity check, fill surfaces will always be valid when used | ||
| 1251 | ASSERT(surface->IsRegionValid(validate_interval)); | ||
| 1252 | return; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | while (true) { | ||
| 1256 | const auto it = surface->invalid_regions.find(validate_interval); | ||
| 1257 | if (it == surface->invalid_regions.end()) | ||
| 1258 | break; | ||
| 1259 | |||
| 1260 | const auto interval = *it & validate_interval; | ||
| 1261 | // Look for a valid surface to copy from | ||
| 1262 | SurfaceParams params = *surface; | ||
| 1263 | |||
| 1264 | Surface copy_surface = | ||
| 1265 | FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); | ||
| 1266 | if (copy_surface != nullptr) { | ||
| 1267 | SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); | ||
| 1268 | CopySurface(copy_surface, surface, copy_interval); | ||
| 1269 | surface->invalid_regions.erase(copy_interval); | ||
| 1270 | continue; | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | // Load data from Switch memory | ||
| 1274 | FlushRegion(params.addr, params.size); | ||
| 1275 | surface->LoadGLBuffer(params.addr, params.end); | ||
| 1276 | surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, | ||
| 1277 | draw_framebuffer.handle); | ||
| 1278 | surface->invalid_regions.erase(params.GetInterval()); | ||
| 1279 | } | ||
| 1280 | } | ||
| 1281 | |||
| 1282 | void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { | ||
| 1283 | if (size == 0) | ||
| 1284 | return; | ||
| 1285 | |||
| 1286 | const SurfaceInterval flush_interval(addr, addr + size); | ||
| 1287 | SurfaceRegions flushed_intervals; | ||
| 1288 | |||
| 1289 | for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { | ||
| 1290 | // small sizes imply that this most likely comes from the cpu, flush the entire region | ||
| 1291 | // the point is to avoid thousands of small writes every frame if the cpu decides to access | ||
| 1292 | // that region, anything higher than 8 you're guaranteed it comes from a service | ||
| 1293 | const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; | ||
| 1294 | auto& surface = pair.second; | ||
| 1295 | |||
| 1296 | if (flush_surface != nullptr && surface != flush_surface) | ||
| 1297 | continue; | ||
| 1298 | |||
| 1299 | // Sanity check, this surface is the last one that marked this region dirty | ||
| 1300 | ASSERT(surface->IsRegionValid(interval)); | ||
| 1301 | |||
| 1302 | if (surface->type != SurfaceType::Fill) { | ||
| 1303 | SurfaceParams params = surface->FromInterval(interval); | ||
| 1304 | surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, | ||
| 1305 | draw_framebuffer.handle); | ||
| 1306 | } | ||
| 1307 | surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); | ||
| 1308 | flushed_intervals += interval; | ||
| 1309 | } | ||
| 1310 | // Reset dirty regions | ||
| 1311 | dirty_regions -= flushed_intervals; | ||
| 1312 | } | 425 | } |
| 1313 | 426 | ||
| 1314 | void RasterizerCacheOpenGL::FlushAll() { | 427 | void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { |
| 1315 | FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); | 428 | surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); |
| 429 | surface->FlushGLBuffer(); | ||
| 1316 | } | 430 | } |
| 1317 | 431 | ||
| 1318 | void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, | 432 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { |
| 1319 | const Surface& region_owner) { | 433 | if (params.addr == 0 || params.height * params.width == 0) { |
| 1320 | if (size == 0) | 434 | return {}; |
| 1321 | return; | ||
| 1322 | |||
| 1323 | const SurfaceInterval invalid_interval(addr, addr + size); | ||
| 1324 | |||
| 1325 | if (region_owner != nullptr) { | ||
| 1326 | ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); | ||
| 1327 | // Surfaces can't have a gap | ||
| 1328 | ASSERT(region_owner->width == region_owner->stride); | ||
| 1329 | region_owner->invalid_regions.erase(invalid_interval); | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { | ||
| 1333 | for (auto& cached_surface : pair.second) { | ||
| 1334 | if (cached_surface == region_owner) | ||
| 1335 | continue; | ||
| 1336 | |||
| 1337 | // If cpu is invalidating this region we want to remove it | ||
| 1338 | // to (likely) mark the memory pages as uncached | ||
| 1339 | if (region_owner == nullptr && size <= 8) { | ||
| 1340 | FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); | ||
| 1341 | remove_surfaces.emplace(cached_surface); | ||
| 1342 | continue; | ||
| 1343 | } | ||
| 1344 | |||
| 1345 | const auto interval = cached_surface->GetInterval() & invalid_interval; | ||
| 1346 | cached_surface->invalid_regions.insert(interval); | ||
| 1347 | |||
| 1348 | // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures | ||
| 1349 | if (cached_surface->type == SurfaceType::Fill && | ||
| 1350 | cached_surface->IsSurfaceFullyInvalid()) { | ||
| 1351 | remove_surfaces.emplace(cached_surface); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | } | 435 | } |
| 1355 | 436 | ||
| 1356 | if (region_owner != nullptr) | 437 | // Check for an exact match in existing surfaces |
| 1357 | dirty_regions.set({invalid_interval, region_owner}); | 438 | auto search = surface_cache.find(params.addr); |
| 1358 | else | 439 | Surface surface; |
| 1359 | dirty_regions.erase(invalid_interval); | 440 | if (search != surface_cache.end()) { |
| 1360 | 441 | surface = search->second; | |
| 1361 | for (auto& remove_surface : remove_surfaces) { | 442 | } else { |
| 1362 | if (remove_surface == region_owner) { | 443 | surface = std::make_shared<CachedSurface>(params); |
| 1363 | Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( | 444 | surface_cache[params.addr] = surface; |
| 1364 | surface_cache, *region_owner, ScaleMatch::Ignore); | ||
| 1365 | ASSERT(expanded_surface); | ||
| 1366 | |||
| 1367 | if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { | ||
| 1368 | DuplicateSurface(region_owner, expanded_surface); | ||
| 1369 | } else { | ||
| 1370 | continue; | ||
| 1371 | } | ||
| 1372 | } | ||
| 1373 | UnregisterSurface(remove_surface); | ||
| 1374 | } | 445 | } |
| 1375 | 446 | ||
| 1376 | remove_surfaces.clear(); | 447 | LoadSurface(surface); |
| 1377 | } | ||
| 1378 | |||
| 1379 | Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { | ||
| 1380 | Surface surface = std::make_shared<CachedSurface>(); | ||
| 1381 | static_cast<SurfaceParams&>(*surface) = params; | ||
| 1382 | |||
| 1383 | surface->texture.Create(); | ||
| 1384 | |||
| 1385 | surface->gl_buffer_size = 0; | ||
| 1386 | surface->invalid_regions.insert(surface->GetInterval()); | ||
| 1387 | AllocateSurfaceTexture(surface->texture.handle, | ||
| 1388 | GetFormatTuple(surface->pixel_format, surface->component_type), | ||
| 1389 | surface->GetScaledWidth(), surface->GetScaledHeight()); | ||
| 1390 | 448 | ||
| 1391 | return surface; | 449 | return surface; |
| 1392 | } | 450 | } |
| 1393 | |||
| 1394 | void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { | ||
| 1395 | if (surface->registered) { | ||
| 1396 | return; | ||
| 1397 | } | ||
| 1398 | surface->registered = true; | ||
| 1399 | surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); | ||
| 1400 | UpdatePagesCachedCount(surface->addr, surface->size, 1); | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { | ||
| 1404 | if (!surface->registered) { | ||
| 1405 | return; | ||
| 1406 | } | ||
| 1407 | surface->registered = false; | ||
| 1408 | UpdatePagesCachedCount(surface->addr, surface->size, -1); | ||
| 1409 | surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | ||
| 1413 | const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - | ||
| 1414 | (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; | ||
| 1415 | const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; | ||
| 1416 | const u64 page_end = page_start + num_pages; | ||
| 1417 | |||
| 1418 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||
| 1419 | // subtract after iterating | ||
| 1420 | const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); | ||
| 1421 | if (delta > 0) | ||
| 1422 | cached_pages.add({pages_interval, delta}); | ||
| 1423 | |||
| 1424 | for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||
| 1425 | const auto interval = pair.first & pages_interval; | ||
| 1426 | const int count = pair.second; | ||
| 1427 | |||
| 1428 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | ||
| 1429 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 1430 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 1431 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 1432 | const u64 interval_size = interval_end_addr - interval_start_addr; | ||
| 1433 | |||
| 1434 | if (delta > 0 && count == delta) | ||
| 1435 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||
| 1436 | else if (delta < 0 && count == -delta) | ||
| 1437 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||
| 1438 | else | ||
| 1439 | ASSERT(count >= 0); | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | if (delta < 0) | ||
| 1443 | cached_pages.add({pages_interval, delta}); | ||
| 1444 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9da945e19..ca9945df4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -1,57 +1,22 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | 1 | // Copyright 2018 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <map> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 9 | #include <set> | 10 | |
| 10 | #include <tuple> | ||
| 11 | #ifdef __GNUC__ | ||
| 12 | #pragma GCC diagnostic push | ||
| 13 | #pragma GCC diagnostic ignored "-Wunused-local-typedefs" | ||
| 14 | #endif | ||
| 15 | #include <boost/icl/interval_map.hpp> | ||
| 16 | #include <boost/icl/interval_set.hpp> | ||
| 17 | #ifdef __GNUC__ | ||
| 18 | #pragma GCC diagnostic pop | ||
| 19 | #endif | ||
| 20 | #include <boost/optional.hpp> | ||
| 21 | #include <glad/glad.h> | ||
| 22 | #include "common/assert.h" | ||
| 23 | #include "common/common_funcs.h" | ||
| 24 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 25 | #include "common/math_util.h" | 12 | #include "common/math_util.h" |
| 26 | #include "video_core/gpu.h" | ||
| 27 | #include "video_core/memory_manager.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/textures/texture.h" | 14 | #include "video_core/textures/texture.h" |
| 30 | 15 | ||
| 31 | struct CachedSurface; | 16 | class CachedSurface; |
| 32 | using Surface = std::shared_ptr<CachedSurface>; | 17 | using Surface = std::shared_ptr<CachedSurface>; |
| 33 | using SurfaceSet = std::set<Surface>; | ||
| 34 | |||
| 35 | using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; | ||
| 36 | using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; | ||
| 37 | using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; | ||
| 38 | |||
| 39 | using SurfaceInterval = SurfaceCache::interval_type; | ||
| 40 | static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && | ||
| 41 | std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), | ||
| 42 | "incorrect interval types"); | ||
| 43 | |||
| 44 | using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; | ||
| 45 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; | 18 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; |
| 46 | 19 | ||
| 47 | using PageMap = boost::icl::interval_map<u64, int>; | ||
| 48 | |||
| 49 | enum class ScaleMatch { | ||
| 50 | Exact, // only accept same res scale | ||
| 51 | Upscale, // only allow higher scale than params | ||
| 52 | Ignore // accept every scaled res | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct SurfaceParams { | 20 | struct SurfaceParams { |
| 56 | enum class PixelFormat { | 21 | enum class PixelFormat { |
| 57 | ABGR8 = 0, | 22 | ABGR8 = 0, |
| @@ -93,10 +58,10 @@ struct SurfaceParams { | |||
| 93 | /** | 58 | /** |
| 94 | * Gets the compression factor for the specified PixelFormat. This applies to just the | 59 | * Gets the compression factor for the specified PixelFormat. This applies to just the |
| 95 | * "compressed width" and "compressed height", not the overall compression factor of a | 60 | * "compressed width" and "compressed height", not the overall compression factor of a |
| 96 | * compressed image. This is used for maintaining proper surface sizes for compressed texture | 61 | * compressed image. This is used for maintaining proper surface sizes for compressed |
| 97 | * formats. | 62 | * texture formats. |
| 98 | */ | 63 | */ |
| 99 | static constexpr u32 GetCompresssionFactor(PixelFormat format) { | 64 | static constexpr u32 GetCompressionFactor(PixelFormat format) { |
| 100 | if (format == PixelFormat::Invalid) | 65 | if (format == PixelFormat::Invalid) |
| 101 | return 0; | 66 | return 0; |
| 102 | 67 | ||
| @@ -118,8 +83,8 @@ struct SurfaceParams { | |||
| 118 | ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); | 83 | ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); |
| 119 | return compression_factor_table[static_cast<size_t>(format)]; | 84 | return compression_factor_table[static_cast<size_t>(format)]; |
| 120 | } | 85 | } |
| 121 | u32 GetCompresssionFactor() const { | 86 | u32 GetCompressionFactor() const { |
| 122 | return GetCompresssionFactor(pixel_format); | 87 | return GetCompressionFactor(pixel_format); |
| 123 | } | 88 | } |
| 124 | 89 | ||
| 125 | static constexpr u32 GetFormatBpp(PixelFormat format) { | 90 | static constexpr u32 GetFormatBpp(PixelFormat format) { |
| @@ -165,25 +130,6 @@ struct SurfaceParams { | |||
| 165 | } | 130 | } |
| 166 | } | 131 | } |
| 167 | 132 | ||
| 168 | static bool IsFormatASTC(PixelFormat format) { | ||
| 169 | switch (format) { | ||
| 170 | case PixelFormat::ASTC_2D_4X4: | ||
| 171 | return true; | ||
| 172 | default: | ||
| 173 | return false; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { | ||
| 178 | switch (format) { | ||
| 179 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | ||
| 180 | return PixelFormat::ABGR8; | ||
| 181 | default: | ||
| 182 | NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | ||
| 183 | UNREACHABLE(); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { | 133 | static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { |
| 188 | // TODO(Subv): Properly implement this | 134 | // TODO(Subv): Properly implement this |
| 189 | switch (format) { | 135 | switch (format) { |
| @@ -276,36 +222,6 @@ struct SurfaceParams { | |||
| 276 | } | 222 | } |
| 277 | } | 223 | } |
| 278 | 224 | ||
| 279 | static ComponentType ComponentTypeFromGPUPixelFormat( | ||
| 280 | Tegra::FramebufferConfig::PixelFormat format) { | ||
| 281 | switch (format) { | ||
| 282 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | ||
| 283 | return ComponentType::UNorm; | ||
| 284 | default: | ||
| 285 | NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | ||
| 286 | UNREACHABLE(); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { | ||
| 291 | SurfaceType a_type = GetFormatType(pixel_format_a); | ||
| 292 | SurfaceType b_type = GetFormatType(pixel_format_b); | ||
| 293 | |||
| 294 | if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { | ||
| 295 | return true; | ||
| 296 | } | ||
| 297 | |||
| 298 | if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { | ||
| 299 | return true; | ||
| 300 | } | ||
| 301 | |||
| 302 | if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { | ||
| 303 | return true; | ||
| 304 | } | ||
| 305 | |||
| 306 | return false; | ||
| 307 | } | ||
| 308 | |||
| 309 | static SurfaceType GetFormatType(PixelFormat pixel_format) { | 225 | static SurfaceType GetFormatType(PixelFormat pixel_format) { |
| 310 | if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { | 226 | if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { |
| 311 | return SurfaceType::ColorTexture; | 227 | return SurfaceType::ColorTexture; |
| @@ -317,220 +233,79 @@ struct SurfaceParams { | |||
| 317 | return SurfaceType::Invalid; | 233 | return SurfaceType::Invalid; |
| 318 | } | 234 | } |
| 319 | 235 | ||
| 320 | /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" | ||
| 321 | /// and "pixel_format" | ||
| 322 | void UpdateParams() { | ||
| 323 | if (stride == 0) { | ||
| 324 | stride = width; | ||
| 325 | } | ||
| 326 | type = GetFormatType(pixel_format); | ||
| 327 | size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) | ||
| 328 | : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); | ||
| 329 | end = addr + size; | ||
| 330 | } | ||
| 331 | |||
| 332 | SurfaceInterval GetInterval() const { | ||
| 333 | return SurfaceInterval::right_open(addr, end); | ||
| 334 | } | ||
| 335 | |||
| 336 | // Returns the outer rectangle containing "interval" | ||
| 337 | SurfaceParams FromInterval(SurfaceInterval interval) const; | ||
| 338 | |||
| 339 | SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; | ||
| 340 | |||
| 341 | // Returns the region of the biggest valid rectange within interval | ||
| 342 | SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; | ||
| 343 | |||
| 344 | /** | ||
| 345 | * Gets the actual width (in pixels) of the surface. This is provided because `width` is used | ||
| 346 | * for tracking the surface region in memory, which may be compressed for certain formats. In | ||
| 347 | * this scenario, `width` is actually the compressed width. | ||
| 348 | */ | ||
| 349 | u32 GetActualWidth() const { | ||
| 350 | return width * GetCompresssionFactor(); | ||
| 351 | } | ||
| 352 | |||
| 353 | /** | ||
| 354 | * Gets the actual height (in pixels) of the surface. This is provided because `height` is used | ||
| 355 | * for tracking the surface region in memory, which may be compressed for certain formats. In | ||
| 356 | * this scenario, `height` is actually the compressed height. | ||
| 357 | */ | ||
| 358 | u32 GetActualHeight() const { | ||
| 359 | return height * GetCompresssionFactor(); | ||
| 360 | } | ||
| 361 | |||
| 362 | u32 GetScaledWidth() const { | ||
| 363 | return width * res_scale; | ||
| 364 | } | ||
| 365 | |||
| 366 | u32 GetScaledHeight() const { | ||
| 367 | return height * res_scale; | ||
| 368 | } | ||
| 369 | |||
| 370 | MathUtil::Rectangle<u32> GetRect() const { | 236 | MathUtil::Rectangle<u32> GetRect() const { |
| 371 | return {0, height, width, 0}; | 237 | return {0, height, width, 0}; |
| 372 | } | 238 | } |
| 373 | 239 | ||
| 374 | MathUtil::Rectangle<u32> GetScaledRect() const { | 240 | size_t SizeInBytes() const { |
| 375 | return {0, GetScaledHeight(), GetScaledWidth(), 0}; | 241 | const u32 compression_factor{GetCompressionFactor()}; |
| 376 | } | 242 | ASSERT(width % compression_factor == 0); |
| 377 | 243 | ASSERT(height % compression_factor == 0); | |
| 378 | u64 PixelsInBytes(u64 size) const { | 244 | return (width / compression_factor) * (height / compression_factor) * |
| 379 | return size * CHAR_BIT / GetFormatBpp(pixel_format); | 245 | GetFormatBpp(pixel_format) / CHAR_BIT; |
| 380 | } | ||
| 381 | |||
| 382 | u64 BytesInPixels(u64 pixels) const { | ||
| 383 | return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; | ||
| 384 | } | 246 | } |
| 385 | 247 | ||
| 386 | VAddr GetCpuAddr() const; | 248 | VAddr GetCpuAddr() const; |
| 387 | 249 | ||
| 388 | bool ExactMatch(const SurfaceParams& other_surface) const; | 250 | Tegra::GPUVAddr addr; |
| 389 | bool CanSubRect(const SurfaceParams& sub_surface) const; | 251 | u32 width; |
| 390 | bool CanExpand(const SurfaceParams& expanded_surface) const; | 252 | u32 height; |
| 391 | bool CanTexCopy(const SurfaceParams& texcopy_params) const; | 253 | u32 block_height; |
| 392 | 254 | bool is_tiled; | |
| 393 | MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; | 255 | PixelFormat pixel_format; |
| 394 | MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; | 256 | SurfaceType type; |
| 395 | 257 | ComponentType component_type; | |
| 396 | Tegra::GPUVAddr addr = 0; | ||
| 397 | Tegra::GPUVAddr end = 0; | ||
| 398 | boost::optional<VAddr> cpu_addr; | ||
| 399 | u64 size = 0; | ||
| 400 | |||
| 401 | u32 width = 0; | ||
| 402 | u32 height = 0; | ||
| 403 | u32 stride = 0; | ||
| 404 | u32 block_height = 0; | ||
| 405 | u16 res_scale = 1; | ||
| 406 | |||
| 407 | bool is_tiled = false; | ||
| 408 | PixelFormat pixel_format = PixelFormat::Invalid; | ||
| 409 | SurfaceType type = SurfaceType::Invalid; | ||
| 410 | ComponentType component_type = ComponentType::Invalid; | ||
| 411 | }; | 258 | }; |
| 259 | static_assert(std::is_pod<SurfaceParams>::value, "SurfaceParams is not POD"); | ||
| 412 | 260 | ||
| 413 | struct CachedSurface : SurfaceParams { | 261 | class CachedSurface final { |
| 414 | bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; | 262 | public: |
| 415 | bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; | 263 | CachedSurface(const SurfaceParams& params); |
| 416 | |||
| 417 | bool IsRegionValid(SurfaceInterval interval) const { | ||
| 418 | return (invalid_regions.find(interval) == invalid_regions.end()); | ||
| 419 | } | ||
| 420 | 264 | ||
| 421 | bool IsSurfaceFullyInvalid() const { | 265 | const OGLTexture& Texture() const { |
| 422 | return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); | 266 | return texture; |
| 423 | } | 267 | } |
| 424 | 268 | ||
| 425 | bool registered = false; | 269 | static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) { |
| 426 | SurfaceRegions invalid_regions; | 270 | if (format == SurfaceParams::PixelFormat::Invalid) |
| 427 | |||
| 428 | u64 fill_size = 0; /// Number of bytes to read from fill_data | ||
| 429 | std::array<u8, 4> fill_data; | ||
| 430 | |||
| 431 | OGLTexture texture; | ||
| 432 | |||
| 433 | static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { | ||
| 434 | if (format == PixelFormat::Invalid) | ||
| 435 | return 0; | 271 | return 0; |
| 436 | 272 | ||
| 437 | return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 273 | return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 438 | } | 274 | } |
| 439 | 275 | ||
| 440 | std::unique_ptr<u8[]> gl_buffer; | 276 | const SurfaceParams& GetSurfaceParams() const { |
| 441 | size_t gl_buffer_size = 0; | 277 | return params; |
| 278 | } | ||
| 442 | 279 | ||
| 443 | // Read/Write data in Switch memory to/from gl_buffer | 280 | // Read/Write data in Switch memory to/from gl_buffer |
| 444 | void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); | 281 | void LoadGLBuffer(); |
| 445 | void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); | 282 | void FlushGLBuffer(); |
| 446 | 283 | ||
| 447 | // Upload/Download data in gl_buffer in/to this surface's texture | 284 | // Upload/Download data in gl_buffer in/to this surface's texture |
| 448 | void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | 285 | void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); |
| 449 | GLuint draw_fb_handle); | 286 | void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); |
| 450 | void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | 287 | |
| 451 | GLuint draw_fb_handle); | 288 | private: |
| 289 | OGLTexture texture; | ||
| 290 | std::unique_ptr<u8[]> gl_buffer; | ||
| 291 | size_t gl_buffer_size; | ||
| 292 | SurfaceParams params; | ||
| 452 | }; | 293 | }; |
| 453 | 294 | ||
| 454 | class RasterizerCacheOpenGL : NonCopyable { | 295 | class RasterizerCacheOpenGL final : NonCopyable { |
| 455 | public: | 296 | public: |
| 456 | RasterizerCacheOpenGL(); | 297 | RasterizerCacheOpenGL(); |
| 457 | ~RasterizerCacheOpenGL(); | ||
| 458 | |||
| 459 | /// Blit one surface's texture to another | ||
| 460 | bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, | ||
| 461 | const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); | ||
| 462 | |||
| 463 | void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, | ||
| 464 | GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); | ||
| 465 | 298 | ||
| 466 | /// Copy one surface's region to another | ||
| 467 | void CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 468 | SurfaceInterval copy_interval); | ||
| 469 | |||
| 470 | /// Load a texture from Switch memory to OpenGL and cache it (if not already cached) | ||
| 471 | Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, | ||
| 472 | bool load_if_create); | ||
| 473 | |||
| 474 | /// Tries to find a framebuffer GPU address based on the provided CPU address | ||
| 475 | boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; | ||
| 476 | |||
| 477 | /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from | ||
| 478 | /// Switch memory to OpenGL and caches it (if not already cached) | ||
| 479 | SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, | ||
| 480 | bool load_if_create); | ||
| 481 | |||
| 482 | /// Get a surface based on the texture configuration | ||
| 483 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); | 299 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); |
| 484 | |||
| 485 | /// Get the color and depth surfaces based on the framebuffer configuration | ||
| 486 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, | 300 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, |
| 487 | const MathUtil::Rectangle<s32>& viewport); | 301 | const MathUtil::Rectangle<s32>& viewport); |
| 488 | 302 | void LoadSurface(const Surface& surface); | |
| 489 | /// Get a surface that matches the fill config | 303 | void FlushSurface(const Surface& surface); |
| 490 | Surface GetFillSurface(const void* config); | ||
| 491 | |||
| 492 | /// Get a surface that matches a "texture copy" display transfer config | ||
| 493 | SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); | ||
| 494 | |||
| 495 | /// Write any cached resources overlapping the region back to memory (if dirty) | ||
| 496 | void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); | ||
| 497 | |||
| 498 | /// Mark region as being invalidated by region_owner (nullptr if Switch memory) | ||
| 499 | void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); | ||
| 500 | |||
| 501 | /// Flush all cached resources tracked by this cache manager | ||
| 502 | void FlushAll(); | ||
| 503 | 304 | ||
| 504 | private: | 305 | private: |
| 505 | void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); | 306 | Surface GetSurface(const SurfaceParams& params); |
| 506 | |||
| 507 | /// Update surface's texture for given region when necessary | ||
| 508 | void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); | ||
| 509 | |||
| 510 | /// Create a new surface | ||
| 511 | Surface CreateSurface(const SurfaceParams& params); | ||
| 512 | |||
| 513 | /// Register surface into the cache | ||
| 514 | void RegisterSurface(const Surface& surface); | ||
| 515 | |||
| 516 | /// Remove surface from the cache | ||
| 517 | void UnregisterSurface(const Surface& surface); | ||
| 518 | |||
| 519 | /// Increase/decrease the number of surface in pages touching the specified region | ||
| 520 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); | ||
| 521 | |||
| 522 | SurfaceCache surface_cache; | ||
| 523 | PageMap cached_pages; | ||
| 524 | SurfaceMap dirty_regions; | ||
| 525 | SurfaceSet remove_surfaces; | ||
| 526 | 307 | ||
| 308 | std::map<Tegra::GPUVAddr, Surface> surface_cache; | ||
| 527 | OGLFramebuffer read_framebuffer; | 309 | OGLFramebuffer read_framebuffer; |
| 528 | OGLFramebuffer draw_framebuffer; | 310 | OGLFramebuffer draw_framebuffer; |
| 529 | |||
| 530 | OGLVertexArray attributeless_vao; | ||
| 531 | OGLBuffer d24s8_abgr_buffer; | ||
| 532 | GLsizeiptr d24s8_abgr_buffer_size; | ||
| 533 | OGLProgram d24s8_abgr_shader; | ||
| 534 | GLint d24s8_abgr_tbo_size_u_id; | ||
| 535 | GLint d24s8_abgr_viewport_u_id; | ||
| 536 | }; | 311 | }; |