summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/fermi_2d.cpp50
-rw-r--r--src/video_core/engines/fermi_2d.h8
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/rasterizer_interface.h11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h9
-rw-r--r--src/yuzu/main.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp2
10 files changed, 121 insertions, 40 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ea1555c5d..912e785b9 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -4,11 +4,13 @@
4 4
5#include "core/memory.h" 5#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 6#include "video_core/engines/fermi_2d.h"
7#include "video_core/rasterizer_interface.h"
7#include "video_core/textures/decoders.h" 8#include "video_core/textures/decoders.h"
8 9
9namespace Tegra::Engines { 10namespace Tegra::Engines {
10 11
11Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} 12Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
13 : memory_manager(memory_manager), rasterizer{rasterizer} {}
12 14
13void Fermi2D::WriteReg(u32 method, u32 value) { 15void Fermi2D::WriteReg(u32 method, u32 value) {
14 ASSERT_MSG(method < Regs::NUM_REGS, 16 ASSERT_MSG(method < Regs::NUM_REGS,
@@ -44,27 +46,31 @@ void Fermi2D::HandleSurfaceCopy() {
44 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); 46 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
45 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 47 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
46 48
47 if (regs.src.linear == regs.dst.linear) { 49 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
48 // If the input layout and the output layout are the same, just perform a raw copy. 50 // TODO(bunnei): The below implementation currently will not get hit, as
49 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); 51 // AccelerateSurfaceCopy tries to always copy and will always return success. This should be
50 Memory::CopyBlock(dest_cpu, source_cpu, 52 // changed once we properly support flushing.
51 src_bytes_per_pixel * regs.dst.width * regs.dst.height); 53
52 return; 54 if (regs.src.linear == regs.dst.linear) {
53 } 55 // If the input layout and the output layout are the same, just perform a raw copy.
54 56 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
55 u8* src_buffer = Memory::GetPointer(source_cpu); 57 Memory::CopyBlock(dest_cpu, source_cpu,
56 u8* dst_buffer = Memory::GetPointer(dest_cpu); 58 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
57 59 return;
58 if (!regs.src.linear && regs.dst.linear) { 60 }
59 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 61 u8* src_buffer = Memory::GetPointer(source_cpu);
60 Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, 62 u8* dst_buffer = Memory::GetPointer(dest_cpu);
61 dst_bytes_per_pixel, src_buffer, dst_buffer, true, 63 if (!regs.src.linear && regs.dst.linear) {
62 regs.src.BlockHeight()); 64 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
63 } else { 65 Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel,
64 // If the input is linear and the output is tiled, swizzle the input and copy it over. 66 dst_bytes_per_pixel, src_buffer, dst_buffer, true,
65 Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, 67 regs.src.BlockHeight());
66 dst_bytes_per_pixel, dst_buffer, src_buffer, false, 68 } else {
67 regs.dst.BlockHeight()); 69 // If the input is linear and the output is tiled, swizzle the input and copy it over.
70 Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel,
71 dst_bytes_per_pixel, dst_buffer, src_buffer, false,
72 regs.dst.BlockHeight());
73 }
68 } 74 }
69} 75}
70 76
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 021b83eaa..81d15c62a 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -12,6 +12,10 @@
12#include "video_core/gpu.h" 12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace VideoCore {
16class RasterizerInterface;
17}
18
15namespace Tegra::Engines { 19namespace Tegra::Engines {
16 20
17#define FERMI2D_REG_INDEX(field_name) \ 21#define FERMI2D_REG_INDEX(field_name) \
@@ -19,7 +23,7 @@ namespace Tegra::Engines {
19 23
20class Fermi2D final { 24class Fermi2D final {
21public: 25public:
22 explicit Fermi2D(MemoryManager& memory_manager); 26 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
23 ~Fermi2D() = default; 27 ~Fermi2D() = default;
24 28
25 /// Write the value to the register identified by method. 29 /// Write the value to the register identified by method.
@@ -94,6 +98,8 @@ public:
94 MemoryManager& memory_manager; 98 MemoryManager& memory_manager;
95 99
96private: 100private:
101 VideoCore::RasterizerInterface& rasterizer;
102
97 /// Performs the copy from the source surface to the destination surface as configured in the 103 /// Performs the copy from the source surface to the destination surface as configured in the
98 /// registers. 104 /// registers.
99 void HandleSurfaceCopy(); 105 void HandleSurfaceCopy();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index baa8b63b7..9ba7e3533 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -25,7 +25,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
25GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { 25GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
26 memory_manager = std::make_unique<Tegra::MemoryManager>(); 26 memory_manager = std::make_unique<Tegra::MemoryManager>();
27 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); 27 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
28 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); 28 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
29 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 29 maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
30 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager); 30 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
31 kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager); 31 kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index cd819d69f..06fc59dbe 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/engines/fermi_2d.h"
8#include "video_core/gpu.h" 9#include "video_core/gpu.h"
9#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
10 11
@@ -33,13 +34,9 @@ public:
33 /// and invalidated 34 /// and invalidated
34 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 35 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
35 36
36 /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 37 /// Attempt to use a faster method to perform a surface copy
37 virtual bool AccelerateDisplayTransfer(const void* config) { 38 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
38 return false; 39 const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
39 }
40
41 /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1
42 virtual bool AccelerateTextureCopy(const void* config) {
43 return false; 40 return false;
44 } 41 }
45 42
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index edcd5e451..209bdf181 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -617,14 +617,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
617 InvalidateRegion(addr, size); 617 InvalidateRegion(addr, size);
618} 618}
619 619
620bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { 620bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
621 const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
621 MICROPROFILE_SCOPE(OpenGL_Blits); 622 MICROPROFILE_SCOPE(OpenGL_Blits);
622 UNREACHABLE(); 623 res_cache.FermiCopySurface(src, dst);
623 return true;
624}
625
626bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) {
627 UNREACHABLE();
628 return true; 624 return true;
629} 625}
630 626
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index dc31a2dbc..0dab2018b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -52,8 +52,8 @@ public:
52 void FlushRegion(VAddr addr, u64 size) override; 52 void FlushRegion(VAddr addr, u64 size) override;
53 void InvalidateRegion(VAddr addr, u64 size) override; 53 void InvalidateRegion(VAddr addr, u64 size) override;
54 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 54 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
55 bool AccelerateDisplayTransfer(const void* config) override; 55 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
56 bool AccelerateTextureCopy(const void* config) override; 56 const Tegra::Engines::Fermi2D::Regs::Surface& dst) override;
57 bool AccelerateFill(const void* config) override; 57 bool AccelerateFill(const void* config) override;
58 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 58 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
59 u32 pixel_stride) override; 59 u32 pixel_stride) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ce967c4d6..56ff83eff 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -143,6 +143,28 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
143 return params; 143 return params;
144} 144}
145 145
146/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
147 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
148 SurfaceParams params{};
149 params.addr = TryGetCpuAddr(config.Address());
150 params.is_tiled = !config.linear;
151 params.block_height = params.is_tiled ? config.BlockHeight() : 0,
152 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
153 params.component_type = ComponentTypeFromRenderTarget(config.format);
154 params.type = GetFormatType(params.pixel_format);
155 params.width = config.width;
156 params.height = config.height;
157 params.unaligned_height = config.height;
158 params.target = SurfaceTarget::Texture2D;
159 params.depth = 1;
160 params.size_in_bytes_total = params.SizeInBytesTotal();
161 params.size_in_bytes_2d = params.SizeInBytes2D();
162 params.max_mip_level = 0;
163 params.rt = {};
164
165 return params;
166}
167
146static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ 168static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
147 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U 169 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
148 {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S 170 {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
@@ -559,6 +581,18 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
559 return true; 581 return true;
560} 582}
561 583
584static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) {
585 const auto& src_params{src_surface->GetSurfaceParams()};
586 const auto& dst_params{dst_surface->GetSurfaceParams()};
587
588 const u32 width{std::min(src_params.width, dst_params.width)};
589 const u32 height{std::min(src_params.height, dst_params.height)};
590
591 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
592 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
593 0, 0, width, height, 1);
594}
595
562static void CopySurface(const Surface& src_surface, const Surface& dst_surface, 596static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
563 GLuint copy_pbo_handle, GLenum src_attachment = 0, 597 GLuint copy_pbo_handle, GLenum src_attachment = 0,
564 GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { 598 GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
@@ -1033,6 +1067,26 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
1033 return surface; 1067 return surface;
1034} 1068}
1035 1069
1070void RasterizerCacheOpenGL::FermiCopySurface(
1071 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1072 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) {
1073
1074 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1075 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
1076
1077 ASSERT(src_params.width == dst_params.width);
1078 ASSERT(src_params.height == dst_params.height);
1079 ASSERT(src_params.pixel_format == dst_params.pixel_format);
1080 ASSERT(src_params.block_height == dst_params.block_height);
1081 ASSERT(src_params.is_tiled == dst_params.is_tiled);
1082 ASSERT(src_params.depth == dst_params.depth);
1083 ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
1084 ASSERT(src_params.target == dst_params.target);
1085 ASSERT(src_params.rt.index == dst_params.rt.index);
1086
1087 FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false));
1088}
1089
1036Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, 1090Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1037 const SurfaceParams& new_params) { 1091 const SurfaceParams& new_params) {
1038 // Verify surface is compatible for blitting 1092 // Verify surface is compatible for blitting
@@ -1041,6 +1095,15 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1041 // Get a new surface with the new parameters, and blit the previous surface to it 1095 // Get a new surface with the new parameters, and blit the previous surface to it
1042 Surface new_surface{GetUncachedSurface(new_params)}; 1096 Surface new_surface{GetUncachedSurface(new_params)};
1043 1097
1098 // For compatible surfaces, we can just do fast glCopyImageSubData based copy
1099 if (old_params.target == new_params.target && old_params.type == new_params.type &&
1100 old_params.depth == new_params.depth && old_params.depth == 1 &&
1101 SurfaceParams::GetFormatBpp(old_params.pixel_format) ==
1102 SurfaceParams::GetFormatBpp(new_params.pixel_format)) {
1103 FastCopySurface(old_surface, new_surface);
1104 return new_surface;
1105 }
1106
1044 // If the format is the same, just do a framebuffer blit. This is significantly faster than 1107 // If the format is the same, just do a framebuffer blit. This is significantly faster than
1045 // using PBOs. The is also likely less accurate, as textures will be converted rather than 1108 // using PBOs. The is also likely less accurate, as textures will be converted rather than
1046 // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate 1109 // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 49025a3fe..0b4940b3c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -13,6 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/hash.h" 14#include "common/hash.h"
15#include "common/math_util.h" 15#include "common/math_util.h"
16#include "video_core/engines/fermi_2d.h"
16#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
17#include "video_core/rasterizer_cache.h" 18#include "video_core/rasterizer_cache.h"
18#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -719,6 +720,10 @@ struct SurfaceParams {
719 Tegra::GPUVAddr zeta_address, 720 Tegra::GPUVAddr zeta_address,
720 Tegra::DepthFormat format); 721 Tegra::DepthFormat format);
721 722
723 /// Creates SurfaceParams for a Fermi2D surface copy
724 static SurfaceParams CreateForFermiCopySurface(
725 const Tegra::Engines::Fermi2D::Regs::Surface& config);
726
722 /// Checks if surfaces are compatible for caching 727 /// Checks if surfaces are compatible for caching
723 bool IsCompatibleSurface(const SurfaceParams& other) const { 728 bool IsCompatibleSurface(const SurfaceParams& other) const {
724 return std::tie(pixel_format, type, width, height, target, depth) == 729 return std::tie(pixel_format, type, width, height, target, depth) ==
@@ -837,6 +842,10 @@ public:
837 /// Tries to find a framebuffer using on the provided CPU address 842 /// Tries to find a framebuffer using on the provided CPU address
838 Surface TryFindFramebufferSurface(VAddr addr) const; 843 Surface TryFindFramebufferSurface(VAddr addr) const;
839 844
845 /// Copies the contents of one surface to another
846 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
847 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config);
848
840private: 849private:
841 void LoadSurface(const Surface& surface); 850 void LoadSurface(const Surface& surface);
842 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); 851 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ad62a82d0..e11833c5a 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -485,6 +485,8 @@ QStringList GMainWindow::GetUnsupportedGLExtensions() {
485 unsupported_ext.append("ARB_texture_storage"); 485 unsupported_ext.append("ARB_texture_storage");
486 if (!GLAD_GL_ARB_multi_bind) 486 if (!GLAD_GL_ARB_multi_bind)
487 unsupported_ext.append("ARB_multi_bind"); 487 unsupported_ext.append("ARB_multi_bind");
488 if (!GLAD_GL_ARB_copy_image)
489 unsupported_ext.append("ARB_copy_image");
488 490
489 // Extensions required to support some texture formats. 491 // Extensions required to support some texture formats.
490 if (!GLAD_GL_EXT_texture_compression_s3tc) 492 if (!GLAD_GL_EXT_texture_compression_s3tc)
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 0733301b2..155095095 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -98,6 +98,8 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
98 unsupported_ext.push_back("ARB_texture_storage"); 98 unsupported_ext.push_back("ARB_texture_storage");
99 if (!GLAD_GL_ARB_multi_bind) 99 if (!GLAD_GL_ARB_multi_bind)
100 unsupported_ext.push_back("ARB_multi_bind"); 100 unsupported_ext.push_back("ARB_multi_bind");
101 if (!GLAD_GL_ARB_copy_image)
102 unsupported_ext.push_back("ARB_copy_image");
101 103
102 // Extensions required to support some texture formats. 104 // Extensions required to support some texture formats.
103 if (!GLAD_GL_EXT_texture_compression_s3tc) 105 if (!GLAD_GL_EXT_texture_compression_s3tc)