diff options
| -rw-r--r-- | src/video_core/texture_cache/decode_bc.cpp | 50 | ||||
| -rw-r--r-- | src/video_core/texture_cache/decode_bc.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 16 |
3 files changed, 35 insertions, 33 deletions
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp index 3e26474a3..a018c6df4 100644 --- a/src/video_core/texture_cache/decode_bc.cpp +++ b/src/video_core/texture_cache/decode_bc.cpp | |||
| @@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | template <auto decompress, PixelFormat pixel_format> | 62 | template <auto decompress, PixelFormat pixel_format> |
| 63 | void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, | 63 | void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, |
| 64 | bool is_signed = false) { | 64 | bool is_signed = false) { |
| 65 | const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); | 65 | const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); |
| 66 | const u32 block_width = std::min(extent.width, BLOCK_SIZE); | 66 | const u32 block_size = BlockSize(pixel_format); |
| 67 | const u32 block_height = std::min(extent.height, BLOCK_SIZE); | 67 | const u32 width = copy.image_extent.width; |
| 68 | const u32 pitch = extent.width * out_bpp; | 68 | const u32 height = copy.image_extent.height * copy.image_subresource.num_layers; |
| 69 | const u32 depth = copy.image_extent.depth; | ||
| 70 | const u32 block_width = std::min(width, BLOCK_SIZE); | ||
| 71 | const u32 block_height = std::min(height, BLOCK_SIZE); | ||
| 72 | const u32 pitch = width * out_bpp; | ||
| 69 | size_t input_offset = 0; | 73 | size_t input_offset = 0; |
| 70 | size_t output_offset = 0; | 74 | size_t output_offset = 0; |
| 71 | for (u32 slice = 0; slice < extent.depth; ++slice) { | 75 | for (u32 slice = 0; slice < depth; ++slice) { |
| 72 | for (u32 y = 0; y < extent.height; y += block_height) { | 76 | for (u32 y = 0; y < height; y += block_height) { |
| 73 | size_t row_offset = 0; | 77 | size_t src_offset = input_offset; |
| 74 | for (u32 x = 0; x < extent.width; | 78 | size_t dst_offset = output_offset; |
| 75 | x += block_width, row_offset += block_width * out_bpp) { | 79 | for (u32 x = 0; x < width; x += block_width) { |
| 76 | const u8* src = input.data() + input_offset; | 80 | const u8* src = input.data() + src_offset; |
| 77 | u8* const dst = output.data() + output_offset + row_offset; | 81 | u8* const dst = output.data() + dst_offset; |
| 78 | if constexpr (IsSigned(pixel_format)) { | 82 | if constexpr (IsSigned(pixel_format)) { |
| 79 | decompress(src, dst, x, y, extent.width, extent.height, is_signed); | 83 | decompress(src, dst, x, y, width, height, is_signed); |
| 80 | } else { | 84 | } else { |
| 81 | decompress(src, dst, x, y, extent.width, extent.height); | 85 | decompress(src, dst, x, y, width, height); |
| 82 | } | 86 | } |
| 83 | input_offset += BlockSize(pixel_format); | 87 | src_offset += block_size; |
| 88 | dst_offset += block_width * out_bpp; | ||
| 84 | } | 89 | } |
| 90 | input_offset += copy.buffer_row_length * block_size / block_width; | ||
| 85 | output_offset += block_height * pitch; | 91 | output_offset += block_height * pitch; |
| 86 | } | 92 | } |
| 87 | } | 93 | } |
| 88 | } | 94 | } |
| 89 | 95 | ||
| 90 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | 96 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, |
| 91 | VideoCore::Surface::PixelFormat pixel_format) { | 97 | VideoCore::Surface::PixelFormat pixel_format) { |
| 92 | switch (pixel_format) { | 98 | switch (pixel_format) { |
| 93 | case PixelFormat::BC1_RGBA_UNORM: | 99 | case PixelFormat::BC1_RGBA_UNORM: |
| 94 | case PixelFormat::BC1_RGBA_SRGB: | 100 | case PixelFormat::BC1_RGBA_SRGB: |
| 95 | DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); | 101 | DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy); |
| 96 | break; | 102 | break; |
| 97 | case PixelFormat::BC2_UNORM: | 103 | case PixelFormat::BC2_UNORM: |
| 98 | case PixelFormat::BC2_SRGB: | 104 | case PixelFormat::BC2_SRGB: |
| 99 | DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); | 105 | DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy); |
| 100 | break; | 106 | break; |
| 101 | case PixelFormat::BC3_UNORM: | 107 | case PixelFormat::BC3_UNORM: |
| 102 | case PixelFormat::BC3_SRGB: | 108 | case PixelFormat::BC3_SRGB: |
| 103 | DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); | 109 | DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy); |
| 104 | break; | 110 | break; |
| 105 | case PixelFormat::BC4_SNORM: | 111 | case PixelFormat::BC4_SNORM: |
| 106 | case PixelFormat::BC4_UNORM: | 112 | case PixelFormat::BC4_UNORM: |
| 107 | DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( | 113 | DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( |
| 108 | input, output, extent, pixel_format == PixelFormat::BC4_SNORM); | 114 | input, output, copy, pixel_format == PixelFormat::BC4_SNORM); |
| 109 | break; | 115 | break; |
| 110 | case PixelFormat::BC5_SNORM: | 116 | case PixelFormat::BC5_SNORM: |
| 111 | case PixelFormat::BC5_UNORM: | 117 | case PixelFormat::BC5_UNORM: |
| 112 | DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( | 118 | DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( |
| 113 | input, output, extent, pixel_format == PixelFormat::BC5_SNORM); | 119 | input, output, copy, pixel_format == PixelFormat::BC5_SNORM); |
| 114 | break; | 120 | break; |
| 115 | case PixelFormat::BC6H_SFLOAT: | 121 | case PixelFormat::BC6H_SFLOAT: |
| 116 | case PixelFormat::BC6H_UFLOAT: | 122 | case PixelFormat::BC6H_UFLOAT: |
| 117 | DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( | 123 | DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( |
| 118 | input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); | 124 | input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT); |
| 119 | break; | 125 | break; |
| 120 | case PixelFormat::BC7_SRGB: | 126 | case PixelFormat::BC7_SRGB: |
| 121 | case PixelFormat::BC7_UNORM: | 127 | case PixelFormat::BC7_UNORM: |
| 122 | DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); | 128 | DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy); |
| 123 | break; | 129 | break; |
| 124 | default: | 130 | default: |
| 125 | LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); | 131 | LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); |
diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h index 41d1ec0a3..4e3b9b8ac 100644 --- a/src/video_core/texture_cache/decode_bc.h +++ b/src/video_core/texture_cache/decode_bc.h | |||
| @@ -13,7 +13,7 @@ namespace VideoCommon { | |||
| 13 | 13 | ||
| 14 | [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); | 14 | [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); |
| 15 | 15 | ||
| 16 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | 16 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, |
| 17 | VideoCore::Surface::PixelFormat pixel_format); | 17 | VideoCore::Surface::PixelFormat pixel_format); |
| 18 | 18 | ||
| 19 | } // namespace VideoCommon | 19 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 15596c925..fcf70068e 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 837 | std::span<u8> output) { | 837 | std::span<u8> output) { |
| 838 | const size_t guest_size_bytes = input.size_bytes(); | 838 | const size_t guest_size_bytes = input.size_bytes(); |
| 839 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | 839 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); |
| 840 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 840 | const Extent3D size = info.size; | 841 | const Extent3D size = info.size; |
| 841 | 842 | ||
| 842 | if (info.type == ImageType::Linear) { | 843 | if (info.type == ImageType::Linear) { |
| @@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 847 | return {{ | 848 | return {{ |
| 848 | .buffer_offset = 0, | 849 | .buffer_offset = 0, |
| 849 | .buffer_size = guest_size_bytes, | 850 | .buffer_size = guest_size_bytes, |
| 850 | .buffer_row_length = info.pitch >> bpp_log2, | 851 | .buffer_row_length = info.pitch * tile_size.width >> bpp_log2, |
| 851 | .buffer_image_height = size.height, | 852 | .buffer_image_height = size.height, |
| 852 | .image_subresource = | 853 | .image_subresource = |
| 853 | { | 854 | { |
| @@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 862 | const LevelInfo level_info = MakeLevelInfo(info); | 863 | const LevelInfo level_info = MakeLevelInfo(info); |
| 863 | const s32 num_layers = info.resources.layers; | 864 | const s32 num_layers = info.resources.layers; |
| 864 | const s32 num_levels = info.resources.levels; | 865 | const s32 num_levels = info.resources.levels; |
| 865 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 866 | const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); | 866 | const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); |
| 867 | const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); | 867 | const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); |
| 868 | const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels); | 868 | const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels); |
| @@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 926 | 926 | ||
| 927 | const auto input_offset = input.subspan(copy.buffer_offset); | 927 | const auto input_offset = input.subspan(copy.buffer_offset); |
| 928 | copy.buffer_offset = output_offset; | 928 | copy.buffer_offset = output_offset; |
| 929 | copy.buffer_row_length = mip_size.width; | ||
| 930 | copy.buffer_image_height = mip_size.height; | ||
| 931 | 929 | ||
| 932 | const auto recompression_setting = Settings::values.astc_recompression.GetValue(); | 930 | const auto recompression_setting = Settings::values.astc_recompression.GetValue(); |
| 933 | const bool astc = IsPixelFormatASTC(info.format); | 931 | const bool astc = IsPixelFormatASTC(info.format); |
| @@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 972 | bpp_div; | 970 | bpp_div; |
| 973 | output_offset += static_cast<u32>(copy.buffer_size); | 971 | output_offset += static_cast<u32>(copy.buffer_size); |
| 974 | } else { | 972 | } else { |
| 975 | const Extent3D image_extent{ | 973 | DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format); |
| 976 | .width = copy.image_extent.width, | ||
| 977 | .height = copy.image_extent.height * copy.image_subresource.num_layers, | ||
| 978 | .depth = copy.image_extent.depth, | ||
| 979 | }; | ||
| 980 | DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); | ||
| 981 | output_offset += copy.image_extent.width * copy.image_extent.height * | 974 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 982 | copy.image_subresource.num_layers * | 975 | copy.image_subresource.num_layers * |
| 983 | ConvertedBytesPerBlock(info.format); | 976 | ConvertedBytesPerBlock(info.format); |
| 984 | } | 977 | } |
| 978 | |||
| 979 | copy.buffer_row_length = mip_size.width; | ||
| 980 | copy.buffer_image_height = mip_size.height; | ||
| 985 | } | 981 | } |
| 986 | } | 982 | } |
| 987 | 983 | ||