summaryrefslogtreecommitdiff
path: root/src/video_core/textures/decoders.cpp
diff options
context:
space:
mode:
authorGravatar Fernando S2022-10-06 21:29:53 +0200
committerGravatar GitHub2022-10-06 21:29:53 +0200
commit1effa578f12f79d7816e3543291f302f126cc1d2 (patch)
tree14803b31b6817294d40d57446f6fa94c5ff3fe9a /src/video_core/textures/decoders.cpp
parentMerge pull request #9025 from FernandoS27/slava-ukrayini (diff)
parentvulkan_blitter: Fix pool allocation double free. (diff)
downloadyuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.gz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.xz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.zip
Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
-rw-r--r--src/video_core/textures/decoders.cpp240
1 files changed, 86 insertions, 154 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 913f8ebcb..52d067a2d 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -35,7 +35,7 @@ void incrpdep(u32& value) {
35 35
36template <bool TO_LINEAR, u32 BYTES_PER_PIXEL> 36template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
37void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth, 37void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
38 u32 block_height, u32 block_depth, u32 stride_alignment) { 38 u32 block_height, u32 block_depth, u32 stride) {
39 // The origin of the transformation can be configured here, leave it as zero as the current API 39 // The origin of the transformation can be configured here, leave it as zero as the current API
40 // doesn't expose it. 40 // doesn't expose it.
41 static constexpr u32 origin_x = 0; 41 static constexpr u32 origin_x = 0;
@@ -45,7 +45,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
45 // We can configure here a custom pitch 45 // We can configure here a custom pitch
46 // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch. 46 // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
47 const u32 pitch = width * BYTES_PER_PIXEL; 47 const u32 pitch = width * BYTES_PER_PIXEL;
48 const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
49 48
50 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); 49 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
51 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); 50 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@@ -89,6 +88,69 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
89 } 88 }
90} 89}
91 90
91template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
92void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
93 u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines,
94 u32 block_height, u32 block_depth, u32 pitch_linear) {
95 // The origin of the transformation can be configured here, leave it as zero as the current API
96 // doesn't expose it.
97 static constexpr u32 origin_z = 0;
98
99 // We can configure here a custom pitch
100 // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
101 const u32 pitch = pitch_linear;
102 const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT);
103
104 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
105 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
106 const u32 slice_size =
107 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
108
109 const u32 block_height_mask = (1U << block_height) - 1;
110 const u32 block_depth_mask = (1U << block_depth) - 1;
111 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
112
113 u32 unprocessed_lines = num_lines;
114 u32 extent_y = std::min(num_lines, height - origin_y);
115
116 for (u32 slice = 0; slice < depth; ++slice) {
117 const u32 z = slice + origin_z;
118 const u32 offset_z = (z >> block_depth) * slice_size +
119 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
120 const u32 lines_in_y = std::min(unprocessed_lines, extent_y);
121 for (u32 line = 0; line < lines_in_y; ++line) {
122 const u32 y = line + origin_y;
123 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
124
125 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
126 const u32 offset_y = (block_y >> block_height) * block_size +
127 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
128
129 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
130 for (u32 column = 0; column < extent_x;
131 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
132 const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
133 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
134
135 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
136 const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
137
138 const u32 unswizzled_offset =
139 slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
140
141 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
142 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
143
144 std::memcpy(dst, src, BYTES_PER_PIXEL);
145 }
146 }
147 unprocessed_lines -= lines_in_y;
148 if (unprocessed_lines == 0) {
149 return;
150 }
151 }
152}
153
92template <bool TO_LINEAR> 154template <bool TO_LINEAR>
93void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, 155void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
94 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { 156 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
@@ -111,122 +173,39 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
111 } 173 }
112} 174}
113 175
114template <u32 BYTES_PER_PIXEL>
115void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
116 u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
117 u32 offset_x, u32 offset_y) {
118 const u32 block_height = 1U << block_height_bit;
119 const u32 image_width_in_gobs =
120 (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
121 for (u32 line = 0; line < subrect_height; ++line) {
122 const u32 dst_y = line + offset_y;
123 const u32 gob_address_y =
124 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
125 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
126
127 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(dst_y);
128 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(offset_x * BYTES_PER_PIXEL);
129 for (u32 x = 0; x < subrect_width;
130 ++x, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
131 const u32 dst_x = x + offset_x;
132 const u32 gob_address =
133 gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
134 const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y);
135 const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
136
137 const u8* const source_line = unswizzled_data + unswizzled_offset;
138 u8* const dest_addr = swizzled_data + swizzled_offset;
139 std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
140 }
141 }
142}
143
144template <u32 BYTES_PER_PIXEL>
145void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
146 u32 origin_x, u32 origin_y, u8* output, const u8* input) {
147 const u32 stride = width * BYTES_PER_PIXEL;
148 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
149 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
150
151 const u32 block_height_mask = (1U << block_height) - 1;
152 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
153
154 for (u32 line = 0; line < line_count; ++line) {
155 const u32 src_y = line + origin_y;
156 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(src_y);
157
158 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
159 const u32 src_offset_y = (block_y >> block_height) * block_size +
160 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
161
162 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
163 for (u32 column = 0; column < line_length_in;
164 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
165 const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
166 const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
167
168 const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y);
169 const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
170
171 std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
172 }
173 }
174}
175
176template <u32 BYTES_PER_PIXEL>
177void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
178 u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
179 const u8* input) {
180 UNIMPLEMENTED_IF(origin_x > 0);
181 UNIMPLEMENTED_IF(origin_y > 0);
182
183 const u32 stride = width * BYTES_PER_PIXEL;
184 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
185 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
186
187 const u32 block_height_mask = (1U << block_height) - 1;
188 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
189
190 for (u32 line = 0; line < line_count; ++line) {
191 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(line);
192 const u32 block_y = line / GOB_SIZE_Y;
193 const u32 dst_offset_y =
194 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
195
196 u32 swizzled_x = 0;
197 for (u32 x = 0; x < line_length_in; ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
198 const u32 dst_offset =
199 ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y);
200 const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
201 std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
202 }
203 }
204}
205} // Anonymous namespace 176} // Anonymous namespace
206 177
207void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, 178void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
208 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, 179 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
209 u32 stride_alignment) { 180 u32 stride_alignment) {
181 const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
182 const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
183 width = (width * bytes_per_pixel) >> new_bpp;
184 bytes_per_pixel = 1U << new_bpp;
210 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, 185 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
211 stride_alignment); 186 stride);
212} 187}
213 188
214void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, 189void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
215 u32 height, u32 depth, u32 block_height, u32 block_depth, 190 u32 height, u32 depth, u32 block_height, u32 block_depth,
216 u32 stride_alignment) { 191 u32 stride_alignment) {
192 const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
193 const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
194 width = (width * bytes_per_pixel) >> new_bpp;
195 bytes_per_pixel = 1U << new_bpp;
217 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, 196 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
218 stride_alignment); 197 stride);
219} 198}
220 199
221void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 200void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
222 u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, 201 u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
223 u32 block_height_bit, u32 offset_x, u32 offset_y) { 202 u32 block_height, u32 block_depth, u32 pitch_linear) {
224 switch (bytes_per_pixel) { 203 switch (bytes_per_pixel) {
225#define BPP_CASE(x) \ 204#define BPP_CASE(x) \
226 case x: \ 205 case x: \
227 return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ 206 return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \
228 swizzled_data, unswizzled_data, block_height_bit, offset_x, \ 207 origin_y, extent_x, extent_y, block_height, \
229 offset_y); 208 block_depth, pitch_linear);
230 BPP_CASE(1) 209 BPP_CASE(1)
231 BPP_CASE(2) 210 BPP_CASE(2)
232 BPP_CASE(3) 211 BPP_CASE(3)
@@ -241,13 +220,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
241 } 220 }
242} 221}
243 222
244void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, 223void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
245 u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { 224 u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
225 u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) {
246 switch (bytes_per_pixel) { 226 switch (bytes_per_pixel) {
247#define BPP_CASE(x) \ 227#define BPP_CASE(x) \
248 case x: \ 228 case x: \
249 return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ 229 return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \
250 origin_x, origin_y, output, input); 230 origin_y, extent_x, extent_y, block_height, \
231 block_depth, pitch_linear);
251 BPP_CASE(1) 232 BPP_CASE(1)
252 BPP_CASE(2) 233 BPP_CASE(2)
253 BPP_CASE(3) 234 BPP_CASE(3)
@@ -262,55 +243,6 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
262 } 243 }
263} 244}
264 245
265void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
266 u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
267 u32 origin_y, u8* output, const u8* input) {
268 switch (bytes_per_pixel) {
269#define BPP_CASE(x) \
270 case x: \
271 return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \
272 block_height, block_depth, origin_x, origin_y, output, \
273 input);
274 BPP_CASE(1)
275 BPP_CASE(2)
276 BPP_CASE(3)
277 BPP_CASE(4)
278 BPP_CASE(6)
279 BPP_CASE(8)
280 BPP_CASE(12)
281 BPP_CASE(16)
282#undef BPP_CASE
283 default:
284 ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
285 }
286}
287
288void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
289 const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
290 u8* swizzle_data) {
291 const u32 block_height = 1U << block_height_bit;
292 const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
293 std::size_t count = 0;
294 for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
295 const std::size_t gob_address_y =
296 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
297 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
298 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(static_cast<u32>(y));
299 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(dst_x);
300 for (std::size_t x = dst_x; x < width && count < copy_size;
301 ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
302 const std::size_t gob_address =
303 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
304 const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y);
305 const u8* source_line = source_data + count;
306 u8* dest_addr = swizzle_data + swizzled_offset;
307 count++;
308
309 *dest_addr = *source_line;
310 }
311 }
312}
313
314std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 246std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
315 u32 block_height, u32 block_depth) { 247 u32 block_height, u32 block_depth) {
316 if (tiled) { 248 if (tiled) {