diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/host_shaders/opengl_copy_bgra.comp | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 22 |
5 files changed, 132 insertions, 2 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 970120acc..3494318ca 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -5,6 +5,7 @@ set(SHADER_FILES | |||
| 5 | convert_float_to_depth.frag | 5 | convert_float_to_depth.frag |
| 6 | full_screen_triangle.vert | 6 | full_screen_triangle.vert |
| 7 | opengl_copy_bc4.comp | 7 | opengl_copy_bc4.comp |
| 8 | opengl_copy_bgra.comp | ||
| 8 | opengl_present.frag | 9 | opengl_present.frag |
| 9 | opengl_present.vert | 10 | opengl_present.vert |
| 10 | pitch_unswizzle.comp | 11 | pitch_unswizzle.comp |
diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp new file mode 100644 index 000000000..2571a4abf --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bgra.comp | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 430 core | ||
| 6 | |||
| 7 | layout (local_size_x = 4, local_size_y = 4) in; | ||
| 8 | |||
| 9 | layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input; | ||
| 10 | layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output; | ||
| 11 | |||
| 12 | void main() { | ||
| 13 | vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID)); | ||
| 14 | imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra); | ||
| 15 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 12434db67..e028677e9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | |||
| 96 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT | 96 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT |
| 97 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT | 97 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT |
| 98 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM | 98 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM |
| 99 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | 99 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM |
| 100 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT | 100 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT |
| 101 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT | 101 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT |
| 102 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT | 102 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT |
| @@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | |||
| 125 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM | 125 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM |
| 126 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | 126 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM |
| 127 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | 127 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM |
| 128 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | 128 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB |
| 129 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | 129 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB |
| 130 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | 130 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB |
| 131 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | 131 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB |
| @@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 396 | } | 396 | } |
| 397 | } | 397 | } |
| 398 | 398 | ||
| 399 | [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { | ||
| 400 | switch (format) { | ||
| 401 | case PixelFormat::B5G6R5_UNORM: | ||
| 402 | case PixelFormat::B8G8R8A8_UNORM: | ||
| 403 | case PixelFormat::B8G8R8A8_SRGB: | ||
| 404 | return true; | ||
| 405 | default: | ||
| 406 | return false; | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 399 | } // Anonymous namespace | 410 | } // Anonymous namespace |
| 400 | 411 | ||
| 401 | ImageBufferMap::~ImageBufferMap() { | 412 | ImageBufferMap::~ImageBufferMap() { |
| @@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { | |||
| 512 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { | 523 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |
| 513 | return false; | 524 | return false; |
| 514 | } | 525 | } |
| 526 | if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { | ||
| 527 | return false; | ||
| 528 | } | ||
| 515 | return true; | 529 | return true; |
| 516 | } | 530 | } |
| 517 | 531 | ||
| @@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, | |||
| 520 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { | 534 | if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { |
| 521 | ASSERT(src.info.type == ImageType::e3D); | 535 | ASSERT(src.info.type == ImageType::e3D); |
| 522 | util_shaders.CopyBC4(dst, src, copies); | 536 | util_shaders.CopyBC4(dst, src, copies); |
| 537 | } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { | ||
| 538 | util_shaders.CopyBGR(dst, src, copies); | ||
| 523 | } else { | 539 | } else { |
| 524 | UNREACHABLE(); | 540 | UNREACHABLE(); |
| 525 | } | 541 | } |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 31ec68505..2fe4799bc 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | 14 | #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" |
| 15 | #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | 15 | #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" |
| 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" |
| 17 | #include "video_core/host_shaders/opengl_copy_bgra_comp.h" | ||
| 17 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | 18 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" |
| 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 19 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 20 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| @@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) { | |||
| 48 | return program; | 49 | return program; |
| 49 | } | 50 | } |
| 50 | 51 | ||
| 52 | size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { | ||
| 53 | return static_cast<size_t>(copy.extent.width * copy.extent.height * | ||
| 54 | copy.src_subresource.num_layers); | ||
| 55 | } | ||
| 56 | |||
| 51 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 52 | 58 | ||
| 53 | UtilShaders::UtilShaders(ProgramManager& program_manager_) | 59 | UtilShaders::UtilShaders(ProgramManager& program_manager_) |
| @@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 55 | block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), | 61 | block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), |
| 56 | block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | 62 | block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), |
| 57 | pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | 63 | pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), |
| 64 | copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), | ||
| 58 | copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | 65 | copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { |
| 59 | const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | 66 | const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); |
| 60 | swizzle_table_buffer.Create(); | 67 | swizzle_table_buffer.Create(); |
| @@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | |||
| 205 | program_manager.RestoreGuestCompute(); | 212 | program_manager.RestoreGuestCompute(); |
| 206 | } | 213 | } |
| 207 | 214 | ||
| 215 | void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, | ||
| 216 | std::span<const VideoCommon::ImageCopy> copies) { | ||
| 217 | static constexpr GLuint BINDING_INPUT_IMAGE = 0; | ||
| 218 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; | ||
| 219 | static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; | ||
| 220 | const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); | ||
| 221 | switch (bytes_per_block) { | ||
| 222 | case 2: | ||
| 223 | // BGR565 copy | ||
| 224 | for (const ImageCopy& copy : copies) { | ||
| 225 | ASSERT(copy.src_offset == zero_offset); | ||
| 226 | ASSERT(copy.dst_offset == zero_offset); | ||
| 227 | bgr_copy_pass.Execute(dst_image, src_image, copy); | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | case 4: { | ||
| 231 | // BGRA8 copy | ||
| 232 | program_manager.BindHostCompute(copy_bgra_program.handle); | ||
| 233 | constexpr GLenum FORMAT = GL_RGBA8; | ||
| 234 | for (const ImageCopy& copy : copies) { | ||
| 235 | ASSERT(copy.src_offset == zero_offset); | ||
| 236 | ASSERT(copy.dst_offset == zero_offset); | ||
| 237 | glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), | ||
| 238 | copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); | ||
| 239 | glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), | ||
| 240 | copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); | ||
| 241 | glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); | ||
| 242 | } | ||
| 243 | program_manager.RestoreGuestCompute(); | ||
| 244 | break; | ||
| 245 | } | ||
| 246 | default: | ||
| 247 | UNREACHABLE(); | ||
| 248 | break; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 208 | GLenum StoreFormat(u32 bytes_per_block) { | 252 | GLenum StoreFormat(u32 bytes_per_block) { |
| 209 | switch (bytes_per_block) { | 253 | switch (bytes_per_block) { |
| 210 | case 1: | 254 | case 1: |
| @@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) { | |||
| 222 | return GL_R8UI; | 266 | return GL_R8UI; |
| 223 | } | 267 | } |
| 224 | 268 | ||
| 269 | void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, | ||
| 270 | const ImageCopy& copy) { | ||
| 271 | if (CopyBufferCreationNeeded(copy)) { | ||
| 272 | CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); | ||
| 273 | } | ||
| 274 | // Copy from source to PBO | ||
| 275 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||
| 276 | glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); | ||
| 277 | glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); | ||
| 278 | glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||
| 279 | copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, | ||
| 280 | static_cast<GLsizei>(bgr16_pbo_size), nullptr); | ||
| 281 | |||
| 282 | // Copy from PBO to destination in reverse order | ||
| 283 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | ||
| 284 | glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); | ||
| 285 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); | ||
| 286 | glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||
| 287 | copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, | ||
| 288 | nullptr); | ||
| 289 | } | ||
| 290 | |||
| 291 | bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { | ||
| 292 | return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); | ||
| 293 | } | ||
| 294 | |||
| 295 | void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { | ||
| 296 | bgr16_pbo.Create(); | ||
| 297 | bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); | ||
| 298 | glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); | ||
| 299 | } | ||
| 300 | |||
| 225 | } // namespace OpenGL | 301 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 7b1d16b09..93b009743 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -19,6 +19,22 @@ class ProgramManager; | |||
| 19 | 19 | ||
| 20 | struct ImageBufferMap; | 20 | struct ImageBufferMap; |
| 21 | 21 | ||
| 22 | class Bgr565CopyPass { | ||
| 23 | public: | ||
| 24 | Bgr565CopyPass() = default; | ||
| 25 | ~Bgr565CopyPass() = default; | ||
| 26 | |||
| 27 | void Execute(const Image& dst_image, const Image& src_image, | ||
| 28 | const VideoCommon::ImageCopy& copy); | ||
| 29 | |||
| 30 | private: | ||
| 31 | [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); | ||
| 32 | void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); | ||
| 33 | |||
| 34 | OGLBuffer bgr16_pbo; | ||
| 35 | size_t bgr16_pbo_size{}; | ||
| 36 | }; | ||
| 37 | |||
| 22 | class UtilShaders { | 38 | class UtilShaders { |
| 23 | public: | 39 | public: |
| 24 | explicit UtilShaders(ProgramManager& program_manager); | 40 | explicit UtilShaders(ProgramManager& program_manager); |
| @@ -36,6 +52,9 @@ public: | |||
| 36 | void CopyBC4(Image& dst_image, Image& src_image, | 52 | void CopyBC4(Image& dst_image, Image& src_image, |
| 37 | std::span<const VideoCommon::ImageCopy> copies); | 53 | std::span<const VideoCommon::ImageCopy> copies); |
| 38 | 54 | ||
| 55 | void CopyBGR(Image& dst_image, Image& src_image, | ||
| 56 | std::span<const VideoCommon::ImageCopy> copies); | ||
| 57 | |||
| 39 | private: | 58 | private: |
| 40 | ProgramManager& program_manager; | 59 | ProgramManager& program_manager; |
| 41 | 60 | ||
| @@ -44,7 +63,10 @@ private: | |||
| 44 | OGLProgram block_linear_unswizzle_2d_program; | 63 | OGLProgram block_linear_unswizzle_2d_program; |
| 45 | OGLProgram block_linear_unswizzle_3d_program; | 64 | OGLProgram block_linear_unswizzle_3d_program; |
| 46 | OGLProgram pitch_unswizzle_program; | 65 | OGLProgram pitch_unswizzle_program; |
| 66 | OGLProgram copy_bgra_program; | ||
| 47 | OGLProgram copy_bc4_program; | 67 | OGLProgram copy_bc4_program; |
| 68 | |||
| 69 | Bgr565CopyPass bgr_copy_pass; | ||
| 48 | }; | 70 | }; |
| 49 | 71 | ||
| 50 | GLenum StoreFormat(u32 bytes_per_block); | 72 | GLenum StoreFormat(u32 bytes_per_block); |