summaryrefslogtreecommitdiff
path: root/src/video_core/textures/decoders.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
-rw-r--r--src/video_core/textures/decoders.cpp225
1 files changed, 75 insertions, 150 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 913f8ebcb..fcc636e0b 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -89,6 +89,69 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
89 } 89 }
90} 90}
91 91
92template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
93void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
94 u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines,
95 u32 block_height, u32 block_depth, u32 pitch_linear) {
96 // The origin of the transformation can be configured here, leave it as zero as the current API
97 // doesn't expose it.
98 static constexpr u32 origin_z = 0;
99
100 // We can configure here a custom pitch
101 // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
102 const u32 pitch = pitch_linear;
103 const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT);
104
105 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
106 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
107 const u32 slice_size =
108 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
109
110 const u32 block_height_mask = (1U << block_height) - 1;
111 const u32 block_depth_mask = (1U << block_depth) - 1;
112 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
113
114 u32 unprocessed_lines = num_lines;
115 u32 extent_y = std::min(num_lines, height - origin_y);
116
117 for (u32 slice = 0; slice < depth; ++slice) {
118 const u32 z = slice + origin_z;
119 const u32 offset_z = (z >> block_depth) * slice_size +
120 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
121 const u32 lines_in_y = std::min(unprocessed_lines, extent_y);
122 for (u32 line = 0; line < lines_in_y; ++line) {
123 const u32 y = line + origin_y;
124 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
125
126 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
127 const u32 offset_y = (block_y >> block_height) * block_size +
128 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
129
130 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
131 for (u32 column = 0; column < extent_x;
132 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
133 const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
134 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
135
136 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
137 const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
138
139 const u32 unswizzled_offset =
140 slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
141
142 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
143 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
144
145 std::memcpy(dst, src, BYTES_PER_PIXEL);
146 }
147 }
148 unprocessed_lines -= lines_in_y;
149 if (unprocessed_lines == 0) {
150 return;
151 }
152 }
153}
154
92template <bool TO_LINEAR> 155template <bool TO_LINEAR>
93void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, 156void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
94 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { 157 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
@@ -111,97 +174,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
111 } 174 }
112} 175}
113 176
114template <u32 BYTES_PER_PIXEL>
115void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
116 u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
117 u32 offset_x, u32 offset_y) {
118 const u32 block_height = 1U << block_height_bit;
119 const u32 image_width_in_gobs =
120 (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
121 for (u32 line = 0; line < subrect_height; ++line) {
122 const u32 dst_y = line + offset_y;
123 const u32 gob_address_y =
124 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
125 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
126
127 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(dst_y);
128 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(offset_x * BYTES_PER_PIXEL);
129 for (u32 x = 0; x < subrect_width;
130 ++x, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
131 const u32 dst_x = x + offset_x;
132 const u32 gob_address =
133 gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
134 const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y);
135 const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
136
137 const u8* const source_line = unswizzled_data + unswizzled_offset;
138 u8* const dest_addr = swizzled_data + swizzled_offset;
139 std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
140 }
141 }
142}
143
144template <u32 BYTES_PER_PIXEL>
145void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
146 u32 origin_x, u32 origin_y, u8* output, const u8* input) {
147 const u32 stride = width * BYTES_PER_PIXEL;
148 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
149 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
150
151 const u32 block_height_mask = (1U << block_height) - 1;
152 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
153
154 for (u32 line = 0; line < line_count; ++line) {
155 const u32 src_y = line + origin_y;
156 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(src_y);
157
158 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
159 const u32 src_offset_y = (block_y >> block_height) * block_size +
160 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
161
162 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
163 for (u32 column = 0; column < line_length_in;
164 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
165 const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
166 const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
167
168 const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y);
169 const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
170
171 std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
172 }
173 }
174}
175
176template <u32 BYTES_PER_PIXEL>
177void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
178 u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
179 const u8* input) {
180 UNIMPLEMENTED_IF(origin_x > 0);
181 UNIMPLEMENTED_IF(origin_y > 0);
182
183 const u32 stride = width * BYTES_PER_PIXEL;
184 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
185 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
186
187 const u32 block_height_mask = (1U << block_height) - 1;
188 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
189
190 for (u32 line = 0; line < line_count; ++line) {
191 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(line);
192 const u32 block_y = line / GOB_SIZE_Y;
193 const u32 dst_offset_y =
194 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
195
196 u32 swizzled_x = 0;
197 for (u32 x = 0; x < line_length_in; ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
198 const u32 dst_offset =
199 ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y);
200 const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
201 std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
202 }
203 }
204}
205} // Anonymous namespace 177} // Anonymous namespace
206 178
207void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, 179void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
@@ -218,15 +190,15 @@ void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_p
218 stride_alignment); 190 stride_alignment);
219} 191}
220 192
221void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 193void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
222 u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, 194 u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
223 u32 block_height_bit, u32 offset_x, u32 offset_y) { 195 u32 block_height, u32 block_depth, u32 pitch_linear) {
224 switch (bytes_per_pixel) { 196 switch (bytes_per_pixel) {
225#define BPP_CASE(x) \ 197#define BPP_CASE(x) \
226 case x: \ 198 case x: \
227 return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ 199 return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \
228 swizzled_data, unswizzled_data, block_height_bit, offset_x, \ 200 origin_y, extent_x, extent_y, block_height, \
229 offset_y); 201 block_depth, pitch_linear);
230 BPP_CASE(1) 202 BPP_CASE(1)
231 BPP_CASE(2) 203 BPP_CASE(2)
232 BPP_CASE(3) 204 BPP_CASE(3)
@@ -241,13 +213,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
241 } 213 }
242} 214}
243 215
244void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, 216void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
245 u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { 217 u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
218 u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) {
246 switch (bytes_per_pixel) { 219 switch (bytes_per_pixel) {
247#define BPP_CASE(x) \ 220#define BPP_CASE(x) \
248 case x: \ 221 case x: \
249 return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ 222 return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \
250 origin_x, origin_y, output, input); 223 origin_y, extent_x, extent_y, block_height, \
224 block_depth, pitch_linear);
251 BPP_CASE(1) 225 BPP_CASE(1)
252 BPP_CASE(2) 226 BPP_CASE(2)
253 BPP_CASE(3) 227 BPP_CASE(3)
@@ -262,55 +236,6 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
262 } 236 }
263} 237}
264 238
265void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
266 u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
267 u32 origin_y, u8* output, const u8* input) {
268 switch (bytes_per_pixel) {
269#define BPP_CASE(x) \
270 case x: \
271 return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \
272 block_height, block_depth, origin_x, origin_y, output, \
273 input);
274 BPP_CASE(1)
275 BPP_CASE(2)
276 BPP_CASE(3)
277 BPP_CASE(4)
278 BPP_CASE(6)
279 BPP_CASE(8)
280 BPP_CASE(12)
281 BPP_CASE(16)
282#undef BPP_CASE
283 default:
284 ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
285 }
286}
287
288void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
289 const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
290 u8* swizzle_data) {
291 const u32 block_height = 1U << block_height_bit;
292 const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
293 std::size_t count = 0;
294 for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
295 const std::size_t gob_address_y =
296 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
297 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
298 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(static_cast<u32>(y));
299 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(dst_x);
300 for (std::size_t x = dst_x; x < width && count < copy_size;
301 ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
302 const std::size_t gob_address =
303 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
304 const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y);
305 const u8* source_line = source_data + count;
306 u8* dest_addr = swizzle_data + swizzled_offset;
307 count++;
308
309 *dest_addr = *source_line;
310 }
311 }
312}
313
314std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 239std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
315 u32 block_height, u32 block_depth) { 240 u32 block_height, u32 block_depth) {
316 if (tiled) { 241 if (tiled) {