summaryrefslogtreecommitdiff
path: root/src/video_core/textures/decoders.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
-rw-r--r--src/video_core/textures/decoders.cpp60
1 files changed, 45 insertions, 15 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 9b6b8527b..913f8ebcb 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -15,6 +15,24 @@
15 15
16namespace Tegra::Texture { 16namespace Tegra::Texture {
17namespace { 17namespace {
18template <u32 mask>
19constexpr u32 pdep(u32 value) {
20 u32 result = 0;
21 u32 m = mask;
22 for (u32 bit = 1; m; bit += bit) {
23 if (value & bit)
24 result |= m & -m;
25 m &= m - 1;
26 }
27 return result;
28}
29
30template <u32 mask, u32 incr_amount>
31void incrpdep(u32& value) {
32 constexpr u32 swizzled_incr = pdep<mask>(incr_amount);
33 value = ((value | ~mask) + swizzled_incr) & mask;
34}
35
18template <bool TO_LINEAR, u32 BYTES_PER_PIXEL> 36template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
19void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth, 37void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
20 u32 block_height, u32 block_depth, u32 stride_alignment) { 38 u32 block_height, u32 block_depth, u32 stride_alignment) {
@@ -44,18 +62,20 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
44 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); 62 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
45 for (u32 line = 0; line < height; ++line) { 63 for (u32 line = 0; line < height; ++line) {
46 const u32 y = line + origin_y; 64 const u32 y = line + origin_y;
47 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; 65 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
48 66
49 const u32 block_y = y >> GOB_SIZE_Y_SHIFT; 67 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
50 const u32 offset_y = (block_y >> block_height) * block_size + 68 const u32 offset_y = (block_y >> block_height) * block_size +
51 ((block_y & block_height_mask) << GOB_SIZE_SHIFT); 69 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
52 70
53 for (u32 column = 0; column < width; ++column) { 71 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
72 for (u32 column = 0; column < width;
73 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
54 const u32 x = (column + origin_x) * BYTES_PER_PIXEL; 74 const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
55 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; 75 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
56 76
57 const u32 base_swizzled_offset = offset_z + offset_y + offset_x; 77 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
58 const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; 78 const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
59 79
60 const u32 unswizzled_offset = 80 const u32 unswizzled_offset =
61 slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; 81 slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
@@ -103,12 +123,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
103 const u32 gob_address_y = 123 const u32 gob_address_y =
104 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 124 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
105 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 125 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
106 const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; 126
107 for (u32 x = 0; x < subrect_width; ++x) { 127 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(dst_y);
128 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(offset_x * BYTES_PER_PIXEL);
129 for (u32 x = 0; x < subrect_width;
130 ++x, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
108 const u32 dst_x = x + offset_x; 131 const u32 dst_x = x + offset_x;
109 const u32 gob_address = 132 const u32 gob_address =
110 gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; 133 gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
111 const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X]; 134 const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y);
112 const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; 135 const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
113 136
114 const u8* const source_line = unswizzled_data + unswizzled_offset; 137 const u8* const source_line = unswizzled_data + unswizzled_offset;
@@ -130,16 +153,19 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
130 153
131 for (u32 line = 0; line < line_count; ++line) { 154 for (u32 line = 0; line < line_count; ++line) {
132 const u32 src_y = line + origin_y; 155 const u32 src_y = line + origin_y;
133 const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; 156 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(src_y);
134 157
135 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; 158 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
136 const u32 src_offset_y = (block_y >> block_height) * block_size + 159 const u32 src_offset_y = (block_y >> block_height) * block_size +
137 ((block_y & block_height_mask) << GOB_SIZE_SHIFT); 160 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
138 for (u32 column = 0; column < line_length_in; ++column) { 161
162 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
163 for (u32 column = 0; column < line_length_in;
164 ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
139 const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; 165 const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
140 const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; 166 const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
141 167
142 const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; 168 const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y);
143 const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; 169 const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
144 170
145 std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); 171 std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
@@ -162,13 +188,15 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
162 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; 188 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
163 189
164 for (u32 line = 0; line < line_count; ++line) { 190 for (u32 line = 0; line < line_count; ++line) {
165 const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; 191 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(line);
166 const u32 block_y = line / GOB_SIZE_Y; 192 const u32 block_y = line / GOB_SIZE_Y;
167 const u32 dst_offset_y = 193 const u32 dst_offset_y =
168 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; 194 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
169 for (u32 x = 0; x < line_length_in; ++x) { 195
196 u32 swizzled_x = 0;
197 for (u32 x = 0; x < line_length_in; ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
170 const u32 dst_offset = 198 const u32 dst_offset =
171 ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; 199 ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y);
172 const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; 200 const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
173 std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); 201 std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
174 } 202 }
@@ -267,11 +295,13 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
267 const std::size_t gob_address_y = 295 const std::size_t gob_address_y =
268 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 296 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
269 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 297 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
270 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; 298 const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(static_cast<u32>(y));
271 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { 299 u32 swizzled_x = pdep<SWIZZLE_X_BITS>(dst_x);
300 for (std::size_t x = dst_x; x < width && count < copy_size;
301 ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) {
272 const std::size_t gob_address = 302 const std::size_t gob_address =
273 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; 303 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
274 const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X]; 304 const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y);
275 const u8* source_line = source_data + count; 305 const u8* source_line = source_data + count;
276 u8* dest_addr = swizzle_data + swizzled_offset; 306 u8* dest_addr = swizzle_data + swizzled_offset;
277 count++; 307 count++;