summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/textures/decoders.cpp117
1 files changed, 66 insertions, 51 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 20ba6d4f6..3d5476e5d 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -13,47 +13,20 @@
13namespace Tegra::Texture { 13namespace Tegra::Texture {
14 14
15/** 15/**
16 * This table represents the internal swizzle of a gob,
17 * in format 16 bytes x 2 sector packing.
16 * Calculates the offset of an (x, y) position within a swizzled texture. 18 * Calculates the offset of an (x, y) position within a swizzled texture.
17 * Taken from the Tegra X1 TRM. 19 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
18 */ 20 */
19static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, u32 block_height) { 21template <std::size_t N, std::size_t M, u32 Align>
20 // Round up to the next gob
21 const u32 image_width_in_gobs{(image_width * bytes_per_pixel + 63) / 64};
22
23 u32 GOB_address = 0 + (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
24 (x * bytes_per_pixel / 64) * 512 * block_height +
25 (y % (8 * block_height) / 8) * 512;
26 x *= bytes_per_pixel;
27 u32 address = GOB_address + ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
28 (y % 2) * 16 + (x % 16);
29
30 return address;
31}
32
33void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
34 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) {
35 u8* data_ptrs[2];
36 for (unsigned y = 0; y < height; ++y) {
37 for (unsigned x = 0; x < width; ++x) {
38 u32 swizzle_offset = GetSwizzleOffset(x, y, width, bytes_per_pixel, block_height);
39 u32 pixel_index = (x + y * width) * out_bytes_per_pixel;
40
41 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
42 data_ptrs[!unswizzle] = &unswizzled_data[pixel_index];
43
44 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
45 }
46 }
47}
48
49template <std::size_t N, std::size_t M>
50struct alignas(64) SwizzleTable { 22struct alignas(64) SwizzleTable {
23 static_assert(M * Align == 64, "Swizzle Table does not align to GOB");
51 constexpr SwizzleTable() { 24 constexpr SwizzleTable() {
52 for (u32 y = 0; y < N; ++y) { 25 for (u32 y = 0; y < N; ++y) {
53 for (u32 x = 0; x < M; ++x) { 26 for (u32 x = 0; x < M; ++x) {
54 const u32 x2 = x * 16; 27 const u32 x2 = x * Align;
55 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + 28 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
56 ((x2 % 32) / 16) * 32 + (y % 2) * 16); 29 ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
57 } 30 }
58 } 31 }
59 } 32 }
@@ -63,24 +36,60 @@ struct alignas(64) SwizzleTable {
63 std::array<std::array<u16, M>, N> values{}; 36 std::array<std::array<u16, M>, N> values{};
64}; 37};
65 38
66constexpr auto swizzle_table = SwizzleTable<8, 4>(); 39constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>();
40constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
67 41
68void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data, 42static void LegacySwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
69 u8* unswizzled_data, bool unswizzle, u32 block_height) { 43 u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
44 u32 block_height) {
45 std::array<u8*, 2> data_ptrs;
46 const std::size_t stride = width * bytes_per_pixel;
47 const std::size_t gobs_in_x = 64;
48 const std::size_t gobs_in_y = 8;
49 const std::size_t gobs_size = gobs_in_x * gobs_in_y;
50 const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
51 for (std::size_t y = 0; y < height; ++y) {
52 const std::size_t gob_y_address =
53 (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
54 (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
55 const auto& table = legacy_swizzle_table[y % gobs_in_y];
56 for (std::size_t x = 0; x < width; ++x) {
57 const std::size_t gob_address =
58 gob_y_address + (x * bytes_per_pixel / gobs_in_x) * gobs_size * block_height;
59 const std::size_t x2 = x * bytes_per_pixel;
60 const std::size_t swizzle_offset = gob_address + table[x2 % gobs_in_x];
61 const std::size_t pixel_index = (x + y * width) * out_bytes_per_pixel;
62
63 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
64 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
65
66 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
67 }
68 }
69}
70
71static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
72 u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
73 u32 block_height) {
70 std::array<u8*, 2> data_ptrs; 74 std::array<u8*, 2> data_ptrs;
71 const std::size_t stride{width * bytes_per_pixel}; 75 const std::size_t stride{width * bytes_per_pixel};
72 const std::size_t image_width_in_gobs{(stride + 63) / 64}; 76 const std::size_t gobs_in_x = 64;
77 const std::size_t gobs_in_y = 8;
78 const std::size_t gobs_size = gobs_in_x * gobs_in_y;
79 const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
73 const std::size_t copy_size{16}; 80 const std::size_t copy_size{16};
74 for (std::size_t y = 0; y < height; ++y) { 81 for (std::size_t y = 0; y < height; ++y) {
75 const std::size_t initial_gob = 82 const std::size_t initial_gob =
76 (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs + 83 (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
77 (y % (8 * block_height) / 8) * 512; 84 (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
78 const std::size_t pixel_base{y * width * bytes_per_pixel}; 85 const std::size_t pixel_base{y * width * out_bytes_per_pixel};
79 const auto& table = swizzle_table[y % 8]; 86 const auto& table = fast_swizzle_table[y % gobs_in_y];
80 for (std::size_t xb = 0; xb < stride; xb += copy_size) { 87 for (std::size_t xb = 0; xb < stride; xb += copy_size) {
81 const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height}; 88 const std::size_t gob_address{initial_gob +
89 (xb / gobs_in_x) * gobs_size * block_height};
82 const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; 90 const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
83 const std::size_t pixel_index{xb + pixel_base}; 91 const std::size_t out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
92 const std::size_t pixel_index{out_x + pixel_base};
84 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 93 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
85 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 94 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
86 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); 95 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
@@ -88,6 +97,17 @@ void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_da
88 } 97 }
89} 98}
90 99
100void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
101 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) {
102 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
103 FastSwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data,
104 unswizzled_data, unswizzle, block_height);
105 } else {
106 LegacySwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data,
107 unswizzled_data, unswizzle, block_height);
108 }
109}
110
91u32 BytesPerPixel(TextureFormat format) { 111u32 BytesPerPixel(TextureFormat format) {
92 switch (format) { 112 switch (format) {
93 case TextureFormat::DXT1: 113 case TextureFormat::DXT1:
@@ -134,13 +154,8 @@ u32 BytesPerPixel(TextureFormat format) {
134std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, 154std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
135 u32 height, u32 block_height) { 155 u32 height, u32 block_height) {
136 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); 156 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
137 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { 157 CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
138 FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel, 158 Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
139 Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
140 } else {
141 CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
142 Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
143 }
144 return unswizzled_data; 159 return unswizzled_data;
145} 160}
146 161