diff options
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 173 |
1 files changed, 116 insertions, 57 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index c32ae956a..c010b9353 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -84,56 +84,31 @@ template <bool TO_LINEAR> | |||
| 84 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | 84 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 85 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | 85 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { |
| 86 | switch (bytes_per_pixel) { | 86 | switch (bytes_per_pixel) { |
| 87 | case 1: | 87 | #define BPP_CASE(x) \ |
| 88 | return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height, | 88 | case x: \ |
| 89 | return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \ | ||
| 89 | block_depth, stride_alignment); | 90 | block_depth, stride_alignment); |
| 90 | case 2: | 91 | BPP_CASE(1) |
| 91 | return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height, | 92 | BPP_CASE(2) |
| 92 | block_depth, stride_alignment); | 93 | BPP_CASE(3) |
| 93 | case 3: | 94 | BPP_CASE(4) |
| 94 | return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height, | 95 | BPP_CASE(6) |
| 95 | block_depth, stride_alignment); | 96 | BPP_CASE(8) |
| 96 | case 4: | 97 | BPP_CASE(12) |
| 97 | return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height, | 98 | BPP_CASE(16) |
| 98 | block_depth, stride_alignment); | 99 | #undef BPP_CASE |
| 99 | case 6: | ||
| 100 | return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height, | ||
| 101 | block_depth, stride_alignment); | ||
| 102 | case 8: | ||
| 103 | return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height, | ||
| 104 | block_depth, stride_alignment); | ||
| 105 | case 12: | ||
| 106 | return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height, | ||
| 107 | block_depth, stride_alignment); | ||
| 108 | case 16: | ||
| 109 | return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height, | ||
| 110 | block_depth, stride_alignment); | ||
| 111 | default: | 100 | default: |
| 112 | UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); | 101 | UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); |
| 113 | } | 102 | } |
| 114 | } | 103 | } |
| 115 | } // Anonymous namespace | ||
| 116 | |||
| 117 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | ||
| 118 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, | ||
| 119 | u32 stride_alignment) { | ||
| 120 | Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, | ||
| 121 | stride_alignment); | ||
| 122 | } | ||
| 123 | |||
| 124 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | ||
| 125 | u32 height, u32 depth, u32 block_height, u32 block_depth, | ||
| 126 | u32 stride_alignment) { | ||
| 127 | Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, | ||
| 128 | stride_alignment); | ||
| 129 | } | ||
| 130 | 104 | ||
| 105 | template <u32 BYTES_PER_PIXEL> | ||
| 131 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 106 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 132 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, | 107 | u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, |
| 133 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | 108 | u32 offset_x, u32 offset_y) { |
| 134 | const u32 block_height = 1U << block_height_bit; | 109 | const u32 block_height = 1U << block_height_bit; |
| 135 | const u32 image_width_in_gobs = | 110 | const u32 image_width_in_gobs = |
| 136 | (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; | 111 | (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X; |
| 137 | for (u32 line = 0; line < subrect_height; ++line) { | 112 | for (u32 line = 0; line < subrect_height; ++line) { |
| 138 | const u32 dst_y = line + offset_y; | 113 | const u32 dst_y = line + offset_y; |
| 139 | const u32 gob_address_y = | 114 | const u32 gob_address_y = |
| @@ -143,20 +118,21 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 143 | for (u32 x = 0; x < subrect_width; ++x) { | 118 | for (u32 x = 0; x < subrect_width; ++x) { |
| 144 | const u32 dst_x = x + offset_x; | 119 | const u32 dst_x = x + offset_x; |
| 145 | const u32 gob_address = | 120 | const u32 gob_address = |
| 146 | gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; | 121 | gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 147 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; | 122 | const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X]; |
| 148 | const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; | 123 | const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; |
| 149 | 124 | ||
| 150 | const u8* const source_line = unswizzled_data + unswizzled_offset; | 125 | const u8* const source_line = unswizzled_data + unswizzled_offset; |
| 151 | u8* const dest_addr = swizzled_data + swizzled_offset; | 126 | u8* const dest_addr = swizzled_data + swizzled_offset; |
| 152 | std::memcpy(dest_addr, source_line, bytes_per_pixel); | 127 | std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL); |
| 153 | } | 128 | } |
| 154 | } | 129 | } |
| 155 | } | 130 | } |
| 156 | 131 | ||
| 157 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, | 132 | template <u32 BYTES_PER_PIXEL> |
| 158 | u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { | 133 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height, |
| 159 | const u32 stride = width * bytes_per_pixel; | 134 | u32 origin_x, u32 origin_y, u8* output, const u8* input) { |
| 135 | const u32 stride = width * BYTES_PER_PIXEL; | ||
| 160 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | 136 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; |
| 161 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); | 137 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); |
| 162 | 138 | ||
| @@ -171,24 +147,25 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, | |||
| 171 | const u32 src_offset_y = (block_y >> block_height) * block_size + | 147 | const u32 src_offset_y = (block_y >> block_height) * block_size + |
| 172 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); | 148 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); |
| 173 | for (u32 column = 0; column < line_length_in; ++column) { | 149 | for (u32 column = 0; column < line_length_in; ++column) { |
| 174 | const u32 src_x = (column + origin_x) * bytes_per_pixel; | 150 | const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; |
| 175 | const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; | 151 | const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 176 | 152 | ||
| 177 | const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; | 153 | const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; |
| 178 | const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel; | 154 | const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; |
| 179 | 155 | ||
| 180 | std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel); | 156 | std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); |
| 181 | } | 157 | } |
| 182 | } | 158 | } |
| 183 | } | 159 | } |
| 184 | 160 | ||
| 161 | template <u32 BYTES_PER_PIXEL> | ||
| 185 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | 162 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, |
| 186 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | 163 | u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output, |
| 187 | u32 origin_y, u8* output, const u8* input) { | 164 | const u8* input) { |
| 188 | UNIMPLEMENTED_IF(origin_x > 0); | 165 | UNIMPLEMENTED_IF(origin_x > 0); |
| 189 | UNIMPLEMENTED_IF(origin_y > 0); | 166 | UNIMPLEMENTED_IF(origin_y > 0); |
| 190 | 167 | ||
| 191 | const u32 stride = width * bytes_per_pixel; | 168 | const u32 stride = width * BYTES_PER_PIXEL; |
| 192 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | 169 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; |
| 193 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | 170 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); |
| 194 | 171 | ||
| @@ -203,11 +180,93 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt | |||
| 203 | for (u32 x = 0; x < line_length_in; ++x) { | 180 | for (u32 x = 0; x < line_length_in; ++x) { |
| 204 | const u32 dst_offset = | 181 | const u32 dst_offset = |
| 205 | ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; | 182 | ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; |
| 206 | const u32 src_offset = x * bytes_per_pixel + line * pitch; | 183 | const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; |
| 207 | std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); | 184 | std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); |
| 208 | } | 185 | } |
| 209 | } | 186 | } |
| 210 | } | 187 | } |
| 188 | } // Anonymous namespace | ||
| 189 | |||
| 190 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | ||
| 191 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, | ||
| 192 | u32 stride_alignment) { | ||
| 193 | Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, | ||
| 194 | stride_alignment); | ||
| 195 | } | ||
| 196 | |||
| 197 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | ||
| 198 | u32 height, u32 depth, u32 block_height, u32 block_depth, | ||
| 199 | u32 stride_alignment) { | ||
| 200 | Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, | ||
| 201 | stride_alignment); | ||
| 202 | } | ||
| 203 | |||
| 204 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | ||
| 205 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, | ||
| 206 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | ||
| 207 | switch (bytes_per_pixel) { | ||
| 208 | #define BPP_CASE(x) \ | ||
| 209 | case x: \ | ||
| 210 | return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ | ||
| 211 | swizzled_data, unswizzled_data, block_height_bit, offset_x, \ | ||
| 212 | offset_y); | ||
| 213 | BPP_CASE(1) | ||
| 214 | BPP_CASE(2) | ||
| 215 | BPP_CASE(3) | ||
| 216 | BPP_CASE(4) | ||
| 217 | BPP_CASE(6) | ||
| 218 | BPP_CASE(8) | ||
| 219 | BPP_CASE(12) | ||
| 220 | BPP_CASE(16) | ||
| 221 | #undef BPP_CASE | ||
| 222 | default: | ||
| 223 | UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); | ||
| 224 | } | ||
| 225 | } | ||
| 226 | |||
| 227 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, | ||
| 228 | u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { | ||
| 229 | switch (bytes_per_pixel) { | ||
| 230 | #define BPP_CASE(x) \ | ||
| 231 | case x: \ | ||
| 232 | return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ | ||
| 233 | origin_x, origin_y, output, input); | ||
| 234 | BPP_CASE(1) | ||
| 235 | BPP_CASE(2) | ||
| 236 | BPP_CASE(3) | ||
| 237 | BPP_CASE(4) | ||
| 238 | BPP_CASE(6) | ||
| 239 | BPP_CASE(8) | ||
| 240 | BPP_CASE(12) | ||
| 241 | BPP_CASE(16) | ||
| 242 | #undef BPP_CASE | ||
| 243 | default: | ||
| 244 | UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); | ||
| 245 | } | ||
| 246 | } | ||
| 247 | |||
| 248 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 249 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 250 | u32 origin_y, u8* output, const u8* input) { | ||
| 251 | switch (bytes_per_pixel) { | ||
| 252 | #define BPP_CASE(x) \ | ||
| 253 | case x: \ | ||
| 254 | return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \ | ||
| 255 | block_height, block_depth, origin_x, origin_y, output, \ | ||
| 256 | input); | ||
| 257 | BPP_CASE(1) | ||
| 258 | BPP_CASE(2) | ||
| 259 | BPP_CASE(3) | ||
| 260 | BPP_CASE(4) | ||
| 261 | BPP_CASE(6) | ||
| 262 | BPP_CASE(8) | ||
| 263 | BPP_CASE(12) | ||
| 264 | BPP_CASE(16) | ||
| 265 | #undef BPP_CASE | ||
| 266 | default: | ||
| 267 | UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); | ||
| 268 | } | ||
| 269 | } | ||
| 211 | 270 | ||
| 212 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 271 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 213 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, | 272 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| @@ -228,7 +287,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 | |||
| 228 | u8* dest_addr = swizzle_data + swizzled_offset; | 287 | u8* dest_addr = swizzle_data + swizzled_offset; |
| 229 | count++; | 288 | count++; |
| 230 | 289 | ||
| 231 | std::memcpy(dest_addr, source_line, 1); | 290 | *dest_addr = *source_line; |
| 232 | } | 291 | } |
| 233 | } | 292 | } |
| 234 | } | 293 | } |