summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar FernandoS272018-10-11 17:08:48 -0400
committerGravatar FernandoS272018-10-13 15:25:17 -0400
commitd4ae43f9c1dd1b366cf71520841d5f2f051ce69d (patch)
tree177872073377d252d33c27857d9ac48394f1c757 /src
parentImplement Precise 3D Swizzle (diff)
downloadyuzu-d4ae43f9c1dd1b366cf71520841d5f2f051ce69d.tar.gz
yuzu-d4ae43f9c1dd1b366cf71520841d5f2f051ce69d.tar.xz
yuzu-d4ae43f9c1dd1b366cf71520841d5f2f051ce69d.zip
Remove old Swizzle algorithms and use 3d Swizzle
Diffstat (limited to 'src')
-rw-r--r--src/video_core/textures/decoders.cpp162
1 files changed, 69 insertions, 93 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index d6750b174..5e2d3ac32 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -40,97 +40,56 @@ struct alignas(64) SwizzleTable {
40constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>(); 40constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>();
41constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>(); 41constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
42 42
43static void LegacySwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 43/**
44 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, 44 * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
45 u32 block_height) { 45 * Instead of going gob by gob, we map the coordinates inside a block and manage from
46 std::array<u8*, 2> data_ptrs; 46 * those. Block_Width is assumed to be 1.
47 const std::size_t stride = width * bytes_per_pixel; 47 */
48 const std::size_t gobs_in_x = 64; 48void Precise3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
49 const std::size_t gobs_in_y = 8; 49 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
50 const std::size_t gobs_size = gobs_in_x * gobs_in_y; 50 const u32 y_end, const u32 z_end, const u32 tile_offset,
51 const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x}; 51 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
52 for (std::size_t y = 0; y < height; ++y) { 52 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
53 const std::size_t gob_y_address =
54 (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
55 (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
56 const auto& table = legacy_swizzle_table[y % gobs_in_y];
57 for (std::size_t x = 0; x < width; ++x) {
58 const std::size_t gob_address =
59 gob_y_address + (x * bytes_per_pixel / gobs_in_x) * gobs_size * block_height;
60 const std::size_t x2 = x * bytes_per_pixel;
61 const std::size_t swizzle_offset = gob_address + table[x2 % gobs_in_x];
62 const std::size_t pixel_index = (x + y * width) * out_bytes_per_pixel;
63
64 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
65 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
66
67 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
68 }
69 }
70}
71
72static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
73 u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
74 u32 block_height) {
75 std::array<u8*, 2> data_ptrs;
76 const std::size_t stride{width * bytes_per_pixel};
77 const std::size_t gobs_in_x = 64;
78 const std::size_t gobs_in_y = 8;
79 const std::size_t gobs_size = gobs_in_x * gobs_in_y;
80 const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
81 const std::size_t copy_size{16};
82 for (std::size_t y = 0; y < height; ++y) {
83 const std::size_t initial_gob =
84 (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
85 (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
86 const std::size_t pixel_base{y * width * out_bytes_per_pixel};
87 const auto& table = fast_swizzle_table[y % gobs_in_y];
88 for (std::size_t xb = 0; xb < stride; xb += copy_size) {
89 const std::size_t gob_address{initial_gob +
90 (xb / gobs_in_x) * gobs_size * block_height};
91 const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
92 const std::size_t out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
93 const std::size_t pixel_index{out_x + pixel_base};
94 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
95 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
96 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
97 }
98 }
99}
100
101void Precise3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, const u32 x_start,
102 const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end,
103 const u32 z_end, const u32 tile_offset, const u32 xy_block_size,
104 const u32 layer_z, const u32 stride_x, const u32 bytes_per_pixel,
105 const u32 out_bytes_per_pixel) {
106 std::array<u8*, 2> data_ptrs; 53 std::array<u8*, 2> data_ptrs;
107 u32 z_adress = tile_offset; 54 u32 z_adress = tile_offset;
108 const u32 gob_size = 64 * 8 * 1; 55 const u32 gob_size_x = 64;
56 const u32 gob_size_y = 8;
57 const u32 gob_size_z = 1;
58 const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
109 for (u32 z = z_start; z < z_end; z++) { 59 for (u32 z = z_start; z < z_end; z++) {
110 u32 y_adress = z_adress; 60 u32 y_adress = z_adress;
111 u32 pixel_base = layer_z * z + y_start * stride_x; 61 u32 pixel_base = layer_z * z + y_start * stride_x;
112 for (u32 y = y_start; y < y_end; y++) { 62 for (u32 y = y_start; y < y_end; y++) {
113 const auto& table = legacy_swizzle_table[y % 8]; 63 const auto& table = legacy_swizzle_table[y % gob_size_y];
114 for (u32 x = x_start; x < x_end; x++) { 64 for (u32 x = x_start; x < x_end; x++) {
115 const u32 swizzle_offset{y_adress + table[x * bytes_per_pixel % 64]}; 65 const u32 swizzle_offset{y_adress + table[x * bytes_per_pixel % gob_size_x]};
116 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; 66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
117 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
118 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
119 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); 69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
120 } 70 }
121 pixel_base += stride_x; 71 pixel_base += stride_x;
122 if ((y + 1) % 8 == 0) 72 if ((y + 1) % gob_size_y == 0)
123 y_adress += gob_size; 73 y_adress += gob_size;
124 } 74 }
125 z_adress += xy_block_size; 75 z_adress += xy_block_size;
126 } 76 }
127} 77}
128 78
129void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 width, 79/**
130 u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 80 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
131 u32 block_height, u32 block_depth) { 81 * The body of this function takes care of splitting the swizzled texture into blocks,
132 auto div_ceil = [](u32 x, u32 y) { return ((x + y - 1) / y); }; 82 * and managing the extents of it. Once all the parameters of a single block are obtained,
133 83 * the function calls '3DProcessBlock' to process that particular Block.
84 *
85 * Documentation for the memory layout and decoding can be found at:
86 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
87 */
88void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
89 const u32 width, const u32 height, const u32 depth,
90 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel,
91 const u32 block_height, const u32 block_depth) {
92 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
134 const u32 stride_x = width * out_bytes_per_pixel; 93 const u32 stride_x = width * out_bytes_per_pixel;
135 const u32 layer_z = height * stride_x; 94 const u32 layer_z = height * stride_x;
136 const u32 gob_x_bytes = 64; 95 const u32 gob_x_bytes = 64;
@@ -157,33 +116,41 @@ void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzl
157 for (u32 xb = 0; xb < blocks_on_x; xb++) { 116 for (u32 xb = 0; xb < blocks_on_x; xb++) {
158 const u32 x_start = xb * block_x_elements; 117 const u32 x_start = xb * block_x_elements;
159 const u32 x_end = std::min(width, x_start + block_x_elements); 118 const u32 x_end = std::min(width, x_start + block_x_elements);
160 Precise3DProcessGobs(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 119 Precise3DProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
161 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, layer_z, 120 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
162 stride_x, bytes_per_pixel, out_bytes_per_pixel); 121 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
163 tile_offset += block_size; 122 tile_offset += block_size;
164 } 123 }
165 } 124 }
166 } 125 }
167} 126}
168 127
169void Fast3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, const u32 x_start, 128/**
170 const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end, 129 * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
171 const u32 z_end, const u32 tile_offset, const u32 xy_block_size, 130 * Instead of going gob by gob, we map the coordinates inside a block and manage from
172 const u32 layer_z, const u32 stride_x, const u32 bytes_per_pixel, 131 * those. Block_Width is assumed to be 1.
173 const u32 out_bytes_per_pixel) { 132 */
133void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
134 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
135 const u32 y_end, const u32 z_end, const u32 tile_offset,
136 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
137 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
174 std::array<u8*, 2> data_ptrs; 138 std::array<u8*, 2> data_ptrs;
175 u32 z_adress = tile_offset; 139 u32 z_adress = tile_offset;
176 const u32 x_startb = x_start * bytes_per_pixel; 140 const u32 x_startb = x_start * bytes_per_pixel;
177 const u32 x_endb = x_end * bytes_per_pixel; 141 const u32 x_endb = x_end * bytes_per_pixel;
178 const u32 copy_size = 16; 142 const u32 copy_size = 16;
179 const u32 gob_size = 64 * 8 * 1; 143 const u32 gob_size_x = 64;
144 const u32 gob_size_y = 8;
145 const u32 gob_size_z = 1;
146 const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
180 for (u32 z = z_start; z < z_end; z++) { 147 for (u32 z = z_start; z < z_end; z++) {
181 u32 y_adress = z_adress; 148 u32 y_adress = z_adress;
182 u32 pixel_base = layer_z * z + y_start * stride_x; 149 u32 pixel_base = layer_z * z + y_start * stride_x;
183 for (u32 y = y_start; y < y_end; y++) { 150 for (u32 y = y_start; y < y_end; y++) {
184 const auto& table = fast_swizzle_table[y % 8]; 151 const auto& table = fast_swizzle_table[y % gob_size_y];
185 for (u32 xb = x_startb; xb < x_endb; xb += copy_size) { 152 for (u32 xb = x_startb; xb < x_endb; xb += copy_size) {
186 const u32 swizzle_offset{y_adress + table[(xb / 16) % 4]}; 153 const u32 swizzle_offset{y_adress + table[(xb / copy_size) % 4]};
187 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 154 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
188 const u32 pixel_index{out_x + pixel_base}; 155 const u32 pixel_index{out_x + pixel_base};
189 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 156 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
@@ -191,18 +158,27 @@ void Fast3DProcessGobs(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, c
191 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); 158 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
192 } 159 }
193 pixel_base += stride_x; 160 pixel_base += stride_x;
194 if ((y + 1) % 8 == 0) 161 if ((y + 1) % gob_size_y == 0)
195 y_adress += gob_size; 162 y_adress += gob_size;
196 } 163 }
197 z_adress += xy_block_size; 164 z_adress += xy_block_size;
198 } 165 }
199} 166}
200 167
201void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 width, 168/**
202 u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 169 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
203 u32 block_height, u32 block_depth) { 170 * The body of this function takes care of splitting the swizzled texture into blocks,
204 auto div_ceil = [](u32 x, u32 y) { return ((x + y - 1) / y); }; 171 * and managing the extents of it. Once all the parameters of a single block are obtained,
205 172 * the function calls '3DProcessBlock' to process that particular Block.
173 *
174 * Documentation for the memory layout and decoding can be found at:
175 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
176 */
177void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
178 const u32 width, const u32 height, const u32 depth,
179 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel,
180 const u32 block_height, const u32 block_depth) {
181 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
206 const u32 stride_x = width * out_bytes_per_pixel; 182 const u32 stride_x = width * out_bytes_per_pixel;
207 const u32 layer_z = height * stride_x; 183 const u32 layer_z = height * stride_x;
208 const u32 gob_x_bytes = 64; 184 const u32 gob_x_bytes = 64;
@@ -229,9 +205,9 @@ void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
229 for (u32 xb = 0; xb < blocks_on_x; xb++) { 205 for (u32 xb = 0; xb < blocks_on_x; xb++) {
230 const u32 x_start = xb * block_x_elements; 206 const u32 x_start = xb * block_x_elements;
231 const u32 x_end = std::min(width, x_start + block_x_elements); 207 const u32 x_end = std::min(width, x_start + block_x_elements);
232 Fast3DProcessGobs(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 208 Fast3DProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
233 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, layer_z, 209 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
234 stride_x, bytes_per_pixel, out_bytes_per_pixel); 210 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
235 tile_offset += block_size; 211 tile_offset += block_size;
236 } 212 }
237 } 213 }
@@ -245,7 +221,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
245 bytes_per_pixel, out_bytes_per_pixel, block_height, 1U); 221 bytes_per_pixel, out_bytes_per_pixel, block_height, 1U);
246 } else { 222 } else {
247 Precise3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, 1U, 223 Precise3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, 1U,
248 bytes_per_pixel, out_bytes_per_pixel, block_height, 1U); 224 bytes_per_pixel, out_bytes_per_pixel, block_height, 1U);
249 } 225 }
250} 226}
251 227