summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Rodrigo Locatti2021-03-09 02:47:51 -0300
committerGravatar GitHub2021-03-09 02:47:51 -0300
commitdaf5c5060b4b2e4aa985fbfe9724eb99c51bbd71 (patch)
treeaaa1cc96298e6e114cb72dfb517f7a000d2a467d /src
parentMerge pull request #6021 from ReinUsesLisp/skip-cache-heuristic (diff)
parenttexture_cache: Blacklist BGRA8 copies and views on OpenGL (diff)
downloadyuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.gz
yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.xz
yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.zip
Merge pull request #5891 from ameerj/bgra-ogl
renderer_opengl: Use compute shaders to swizzle BGR textures on copy
Diffstat (limited to 'src')
-rw-r--r--src/video_core/compatible_formats.cpp49
-rw-r--r--src/video_core/compatible_formats.h5
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_copy_bgra.comp15
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h5
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp76
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h5
-rw-r--r--src/video_core/texture_cache/image_base.cpp5
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp2
-rw-r--r--src/video_core/texture_cache/texture_cache.h15
-rw-r--r--src/video_core/texture_cache/util.cpp13
-rw-r--r--src/video_core/texture_cache/util.h9
14 files changed, 212 insertions, 30 deletions
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index acf2668dc..8317d0636 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -48,6 +48,15 @@ constexpr std::array VIEW_CLASS_32_BITS{
48 PixelFormat::A2B10G10R10_UINT, 48 PixelFormat::A2B10G10R10_UINT,
49}; 49};
50 50
51constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
52 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
53 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
54 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
55 PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
56 PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT,
57 PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
58};
59
51// TODO: How should we handle 24 bits? 60// TODO: How should we handle 24 bits?
52 61
53constexpr std::array VIEW_CLASS_16_BITS{ 62constexpr std::array VIEW_CLASS_16_BITS{
@@ -205,7 +214,6 @@ constexpr Table MakeViewTable() {
205 EnableRange(view, VIEW_CLASS_128_BITS); 214 EnableRange(view, VIEW_CLASS_128_BITS);
206 EnableRange(view, VIEW_CLASS_96_BITS); 215 EnableRange(view, VIEW_CLASS_96_BITS);
207 EnableRange(view, VIEW_CLASS_64_BITS); 216 EnableRange(view, VIEW_CLASS_64_BITS);
208 EnableRange(view, VIEW_CLASS_32_BITS);
209 EnableRange(view, VIEW_CLASS_16_BITS); 217 EnableRange(view, VIEW_CLASS_16_BITS);
210 EnableRange(view, VIEW_CLASS_8_BITS); 218 EnableRange(view, VIEW_CLASS_8_BITS);
211 EnableRange(view, VIEW_CLASS_RGTC1_RED); 219 EnableRange(view, VIEW_CLASS_RGTC1_RED);
@@ -231,20 +239,47 @@ constexpr Table MakeCopyTable() {
231 EnableRange(copy, COPY_CLASS_64_BITS); 239 EnableRange(copy, COPY_CLASS_64_BITS);
232 return copy; 240 return copy;
233} 241}
242
243constexpr Table MakeNativeBgrViewTable() {
244 Table copy = MakeViewTable();
245 EnableRange(copy, VIEW_CLASS_32_BITS);
246 return copy;
247}
248
249constexpr Table MakeNonNativeBgrViewTable() {
250 Table copy = MakeViewTable();
251 EnableRange(copy, VIEW_CLASS_32_BITS_NO_BGR);
252 return copy;
253}
254
255constexpr Table MakeNativeBgrCopyTable() {
256 Table copy = MakeCopyTable();
257 EnableRange(copy, VIEW_CLASS_32_BITS);
258 return copy;
259}
260
261constexpr Table MakeNonNativeBgrCopyTable() {
262 Table copy = MakeCopyTable();
263 EnableRange(copy, VIEW_CLASS_32_BITS);
264 return copy;
265}
234} // Anonymous namespace 266} // Anonymous namespace
235 267
236bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) { 268bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
269 bool native_bgr) {
237 if (broken_views) { 270 if (broken_views) {
238 // If format views are broken, only accept formats that are identical. 271 // If format views are broken, only accept formats that are identical.
239 return format_a == format_b; 272 return format_a == format_b;
240 } 273 }
241 static constexpr Table TABLE = MakeViewTable(); 274 static constexpr Table BGR_TABLE = MakeNativeBgrViewTable();
242 return IsSupported(TABLE, format_a, format_b); 275 static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrViewTable();
276 return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
243} 277}
244 278
245bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { 279bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr) {
246 static constexpr Table TABLE = MakeCopyTable(); 280 static constexpr Table BGR_TABLE = MakeNativeBgrCopyTable();
247 return IsSupported(TABLE, format_a, format_b); 281 static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrCopyTable();
282 return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
248} 283}
249 284
250} // namespace VideoCore::Surface 285} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 9a0522988..55745e042 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -8,8 +8,9 @@
8 8
9namespace VideoCore::Surface { 9namespace VideoCore::Surface {
10 10
11bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views); 11bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
12 bool native_bgr);
12 13
13bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); 14bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr);
14 15
15} // namespace VideoCore::Surface 16} // namespace VideoCore::Surface
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 970120acc..3494318ca 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SHADER_FILES
5 convert_float_to_depth.frag 5 convert_float_to_depth.frag
6 full_screen_triangle.vert 6 full_screen_triangle.vert
7 opengl_copy_bc4.comp 7 opengl_copy_bc4.comp
8 opengl_copy_bgra.comp
8 opengl_present.frag 9 opengl_present.frag
9 opengl_present.vert 10 opengl_present.vert
10 pitch_unswizzle.comp 11 pitch_unswizzle.comp
diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp
new file mode 100644
index 000000000..2571a4abf
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_copy_bgra.comp
@@ -0,0 +1,15 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430 core
6
7layout (local_size_x = 4, local_size_y = 4) in;
8
9layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input;
10layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output;
11
12void main() {
13 vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID));
14 imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra);
15}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 12434db67..e028677e9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
96 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT 96 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
97 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT 97 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
98 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM 98 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
99 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 99 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
100 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT 100 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
101 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT 101 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
102 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT 102 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
@@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
125 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM 125 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
126 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM 126 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
127 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM 127 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
128 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 128 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
129 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB 129 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
130 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB 130 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB 131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
@@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
396 } 396 }
397} 397}
398 398
399[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
400 switch (format) {
401 case PixelFormat::B5G6R5_UNORM:
402 case PixelFormat::B8G8R8A8_UNORM:
403 case PixelFormat::B8G8R8A8_SRGB:
404 return true;
405 default:
406 return false;
407 }
408}
409
399} // Anonymous namespace 410} // Anonymous namespace
400 411
401ImageBufferMap::~ImageBufferMap() { 412ImageBufferMap::~ImageBufferMap() {
@@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
512 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { 523 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
513 return false; 524 return false;
514 } 525 }
526 if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
527 return false;
528 }
515 return true; 529 return true;
516} 530}
517 531
@@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
520 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { 534 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
521 ASSERT(src.info.type == ImageType::e3D); 535 ASSERT(src.info.type == ImageType::e3D);
522 util_shaders.CopyBC4(dst, src, copies); 536 util_shaders.CopyBC4(dst, src, copies);
537 } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
538 util_shaders.CopyBGR(dst, src, copies);
523 } else { 539 } else {
524 UNREACHABLE(); 540 UNREACHABLE();
525 } 541 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index a6172f009..3fbaa102f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -86,6 +86,11 @@ public:
86 86
87 FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; 87 FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
88 88
89 bool HasNativeBgr() const noexcept {
90 // OpenGL does not have native support for the BGR internal format
91 return false;
92 }
93
89 bool HasBrokenTextureViewFormats() const noexcept { 94 bool HasBrokenTextureViewFormats() const noexcept {
90 return has_broken_texture_view_formats; 95 return has_broken_texture_view_formats;
91 } 96 }
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 31ec68505..2fe4799bc 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -14,6 +14,7 @@
14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" 14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" 15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h" 16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
17#include "video_core/host_shaders/pitch_unswizzle_comp.h" 18#include "video_core/host_shaders/pitch_unswizzle_comp.h"
18#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
19#include "video_core/renderer_opengl/gl_shader_manager.h" 20#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) {
48 return program; 49 return program;
49} 50}
50 51
52size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
53 return static_cast<size_t>(copy.extent.width * copy.extent.height *
54 copy.src_subresource.num_layers);
55}
56
51} // Anonymous namespace 57} // Anonymous namespace
52 58
53UtilShaders::UtilShaders(ProgramManager& program_manager_) 59UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
55 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), 61 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
56 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), 62 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
57 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), 63 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
64 copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
58 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { 65 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
59 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); 66 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
60 swizzle_table_buffer.Create(); 67 swizzle_table_buffer.Create();
@@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
205 program_manager.RestoreGuestCompute(); 212 program_manager.RestoreGuestCompute();
206} 213}
207 214
215void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
216 std::span<const VideoCommon::ImageCopy> copies) {
217 static constexpr GLuint BINDING_INPUT_IMAGE = 0;
218 static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
219 static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
220 const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
221 switch (bytes_per_block) {
222 case 2:
223 // BGR565 copy
224 for (const ImageCopy& copy : copies) {
225 ASSERT(copy.src_offset == zero_offset);
226 ASSERT(copy.dst_offset == zero_offset);
227 bgr_copy_pass.Execute(dst_image, src_image, copy);
228 }
229 break;
230 case 4: {
231 // BGRA8 copy
232 program_manager.BindHostCompute(copy_bgra_program.handle);
233 constexpr GLenum FORMAT = GL_RGBA8;
234 for (const ImageCopy& copy : copies) {
235 ASSERT(copy.src_offset == zero_offset);
236 ASSERT(copy.dst_offset == zero_offset);
237 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
238 copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
239 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
240 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
241 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
242 }
243 program_manager.RestoreGuestCompute();
244 break;
245 }
246 default:
247 UNREACHABLE();
248 break;
249 }
250}
251
208GLenum StoreFormat(u32 bytes_per_block) { 252GLenum StoreFormat(u32 bytes_per_block) {
209 switch (bytes_per_block) { 253 switch (bytes_per_block) {
210 case 1: 254 case 1:
@@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) {
222 return GL_R8UI; 266 return GL_R8UI;
223} 267}
224 268
269void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
270 const ImageCopy& copy) {
271 if (CopyBufferCreationNeeded(copy)) {
272 CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
273 }
274 // Copy from source to PBO
275 glPixelStorei(GL_PACK_ALIGNMENT, 1);
276 glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
277 glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
278 glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
279 copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
280 static_cast<GLsizei>(bgr16_pbo_size), nullptr);
281
282 // Copy from PBO to destination in reverse order
283 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
284 glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
285 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
286 glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
287 copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
288 nullptr);
289}
290
291bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
292 return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
293}
294
295void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
296 bgr16_pbo.Create();
297 bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
298 glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
299}
300
225} // namespace OpenGL 301} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 7b1d16b09..93b009743 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -19,6 +19,22 @@ class ProgramManager;
19 19
20struct ImageBufferMap; 20struct ImageBufferMap;
21 21
22class Bgr565CopyPass {
23public:
24 Bgr565CopyPass() = default;
25 ~Bgr565CopyPass() = default;
26
27 void Execute(const Image& dst_image, const Image& src_image,
28 const VideoCommon::ImageCopy& copy);
29
30private:
31 [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy);
32 void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format);
33
34 OGLBuffer bgr16_pbo;
35 size_t bgr16_pbo_size{};
36};
37
22class UtilShaders { 38class UtilShaders {
23public: 39public:
24 explicit UtilShaders(ProgramManager& program_manager); 40 explicit UtilShaders(ProgramManager& program_manager);
@@ -36,6 +52,9 @@ public:
36 void CopyBC4(Image& dst_image, Image& src_image, 52 void CopyBC4(Image& dst_image, Image& src_image,
37 std::span<const VideoCommon::ImageCopy> copies); 53 std::span<const VideoCommon::ImageCopy> copies);
38 54
55 void CopyBGR(Image& dst_image, Image& src_image,
56 std::span<const VideoCommon::ImageCopy> copies);
57
39private: 58private:
40 ProgramManager& program_manager; 59 ProgramManager& program_manager;
41 60
@@ -44,7 +63,10 @@ private:
44 OGLProgram block_linear_unswizzle_2d_program; 63 OGLProgram block_linear_unswizzle_2d_program;
45 OGLProgram block_linear_unswizzle_3d_program; 64 OGLProgram block_linear_unswizzle_3d_program;
46 OGLProgram pitch_unswizzle_program; 65 OGLProgram pitch_unswizzle_program;
66 OGLProgram copy_bgra_program;
47 OGLProgram copy_bc4_program; 67 OGLProgram copy_bc4_program;
68
69 Bgr565CopyPass bgr_copy_pass;
48}; 70};
49 71
50GLenum StoreFormat(u32 bytes_per_block); 72GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index b08c23459..3aee27ce0 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -93,6 +93,11 @@ struct TextureCacheRuntime {
93 // No known Vulkan driver has broken image views 93 // No known Vulkan driver has broken image views
94 return false; 94 return false;
95 } 95 }
96
97 bool HasNativeBgr() const noexcept {
98 // All known Vulkan drivers can natively handle BGR textures
99 return true;
100 }
96}; 101};
97 102
98class Image : public VideoCommon::ImageBase { 103class Image : public VideoCommon::ImageBase {
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 959b3f115..9914926b3 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -120,9 +120,10 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
120 if (lhs.info.type == ImageType::Linear) { 120 if (lhs.info.type == ImageType::Linear) {
121 base = SubresourceBase{.level = 0, .layer = 0}; 121 base = SubresourceBase{.level = 0, .layer = 0};
122 } else { 122 } else {
123 // We are passing relaxed formats as an option, having broken views or not won't matter 123 // We are passing relaxed formats as an option, having broken views/bgr or not won't matter
124 static constexpr bool broken_views = false; 124 static constexpr bool broken_views = false;
125 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); 125 static constexpr bool native_bgr = true;
126 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views, native_bgr);
126 } 127 }
127 if (!base) { 128 if (!base) {
128 LOG_ERROR(HW_GPU, "Image alias should have been flipped"); 129 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 18f72e508..f89a40b4c 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
24 .height = std::max(image_info.size.height >> range.base.level, 1u), 24 .height = std::max(image_info.size.height >> range.base.level, 1u),
25 .depth = std::max(image_info.size.depth >> range.base.level, 1u), 25 .depth = std::max(image_info.size.depth >> range.base.level, 1u),
26 } { 26 } {
27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), 27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true),
28 "Image view format {} is incompatible with image format {}", info.format, 28 "Image view format {} is incompatible with image format {}", info.format,
29 image_info.format); 29 image_info.format);
30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); 30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index b1da69971..98e33c3a0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -876,6 +876,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
876 return ImageId{}; 876 return ImageId{};
877 } 877 }
878 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 878 const bool broken_views = runtime.HasBrokenTextureViewFormats();
879 const bool native_bgr = runtime.HasNativeBgr();
879 ImageId image_id; 880 ImageId image_id;
880 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 881 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
881 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { 882 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
@@ -885,11 +886,12 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
885 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && 886 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
886 existing.pitch == info.pitch && 887 existing.pitch == info.pitch &&
887 IsPitchLinearSameSize(existing, info, strict_size) && 888 IsPitchLinearSameSize(existing, info, strict_size) &&
888 IsViewCompatible(existing.format, info.format, broken_views)) { 889 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
889 image_id = existing_image_id; 890 image_id = existing_image_id;
890 return true; 891 return true;
891 } 892 }
892 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { 893 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
894 native_bgr)) {
893 image_id = existing_image_id; 895 image_id = existing_image_id;
894 return true; 896 return true;
895 } 897 }
@@ -920,6 +922,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
920 ImageInfo new_info = info; 922 ImageInfo new_info = info;
921 const size_t size_bytes = CalculateGuestSizeInBytes(new_info); 923 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
922 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 924 const bool broken_views = runtime.HasBrokenTextureViewFormats();
925 const bool native_bgr = runtime.HasNativeBgr();
923 std::vector<ImageId> overlap_ids; 926 std::vector<ImageId> overlap_ids;
924 std::vector<ImageId> left_aliased_ids; 927 std::vector<ImageId> left_aliased_ids;
925 std::vector<ImageId> right_aliased_ids; 928 std::vector<ImageId> right_aliased_ids;
@@ -935,8 +938,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
935 return; 938 return;
936 } 939 }
937 static constexpr bool strict_size = true; 940 static constexpr bool strict_size = true;
938 const std::optional<OverlapResult> solution = 941 const std::optional<OverlapResult> solution = ResolveOverlap(
939 ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); 942 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
940 if (solution) { 943 if (solution) {
941 gpu_addr = solution->gpu_addr; 944 gpu_addr = solution->gpu_addr;
942 cpu_addr = solution->cpu_addr; 945 cpu_addr = solution->cpu_addr;
@@ -946,10 +949,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
946 } 949 }
947 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; 950 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
948 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); 951 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
949 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { 952 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
950 left_aliased_ids.push_back(overlap_id); 953 left_aliased_ids.push_back(overlap_id);
951 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, 954 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
952 broken_views)) { 955 broken_views, native_bgr)) {
953 right_aliased_ids.push_back(overlap_id); 956 right_aliased_ids.push_back(overlap_id);
954 } 957 }
955 }); 958 });
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index a0bc1f7b6..2c42d1449 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1035,13 +1035,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri
1035 1035
1036std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, 1036std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
1037 VAddr cpu_addr, const ImageBase& overlap, 1037 VAddr cpu_addr, const ImageBase& overlap,
1038 bool strict_size, bool broken_views) { 1038 bool strict_size, bool broken_views, bool native_bgr) {
1039 ASSERT(new_info.type != ImageType::Linear); 1039 ASSERT(new_info.type != ImageType::Linear);
1040 ASSERT(overlap.info.type != ImageType::Linear); 1040 ASSERT(overlap.info.type != ImageType::Linear);
1041 if (!IsLayerStrideCompatible(new_info, overlap.info)) { 1041 if (!IsLayerStrideCompatible(new_info, overlap.info)) {
1042 return std::nullopt; 1042 return std::nullopt;
1043 } 1043 }
1044 if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { 1044 if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views, native_bgr)) {
1045 return std::nullopt; 1045 return std::nullopt;
1046 } 1046 }
1047 if (gpu_addr == overlap.gpu_addr) { 1047 if (gpu_addr == overlap.gpu_addr) {
@@ -1085,14 +1085,14 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
1085 1085
1086std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, 1086std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
1087 GPUVAddr candidate_addr, RelaxedOptions options, 1087 GPUVAddr candidate_addr, RelaxedOptions options,
1088 bool broken_views) { 1088 bool broken_views, bool native_bgr) {
1089 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); 1089 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
1090 if (!base) { 1090 if (!base) {
1091 return std::nullopt; 1091 return std::nullopt;
1092 } 1092 }
1093 const ImageInfo& existing = image.info; 1093 const ImageInfo& existing = image.info;
1094 if (False(options & RelaxedOptions::Format)) { 1094 if (False(options & RelaxedOptions::Format)) {
1095 if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { 1095 if (!IsViewCompatible(existing.format, candidate.format, broken_views, native_bgr)) {
1096 return std::nullopt; 1096 return std::nullopt;
1097 } 1097 }
1098 } 1098 }
@@ -1129,8 +1129,9 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
1129} 1129}
1130 1130
1131bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, 1131bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
1132 RelaxedOptions options, bool broken_views) { 1132 RelaxedOptions options, bool broken_views, bool native_bgr) {
1133 return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); 1133 return FindSubresource(candidate, image, candidate_addr, options, broken_views, native_bgr)
1134 .has_value();
1134} 1135}
1135 1136
1136void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, 1137void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 52a9207d6..4d0072867 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -87,7 +87,8 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, 87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
88 GPUVAddr gpu_addr, VAddr cpu_addr, 88 GPUVAddr gpu_addr, VAddr cpu_addr,
89 const ImageBase& overlap, 89 const ImageBase& overlap,
90 bool strict_size, bool broken_views); 90 bool strict_size, bool broken_views,
91 bool native_bgr);
91 92
92[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); 93[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
93 94
@@ -95,11 +96,11 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
95 const ImageBase& image, 96 const ImageBase& image,
96 GPUVAddr candidate_addr, 97 GPUVAddr candidate_addr,
97 RelaxedOptions options, 98 RelaxedOptions options,
98 bool broken_views); 99 bool broken_views, bool native_bgr);
99 100
100[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, 101[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
101 GPUVAddr candidate_addr, RelaxedOptions options, 102 GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views,
102 bool broken_views); 103 bool native_bgr);
103 104
104void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, 105void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
105 const ImageBase* src); 106 const ImageBase* src);