diff options
| author | 2022-08-14 02:36:36 -0700 | |
|---|---|---|
| committer | 2022-10-06 21:00:53 +0200 | |
| commit | f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 (patch) | |
| tree | 5156a04816d6556b8babe7d69301f18098b8dd1d /src/video_core/textures/decoders.cpp | |
| parent | Maxwell3D: Add small_index_2 (diff) | |
| download | yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.gz yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.xz yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.zip | |
DMA & InlineToMemory Engines Rework.
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 225 |
1 files changed, 75 insertions, 150 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 913f8ebcb..fcc636e0b 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -89,6 +89,69 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 | |||
| 89 | } | 89 | } |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | template <bool TO_LINEAR, u32 BYTES_PER_PIXEL> | ||
| 93 | void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, | ||
| 94 | u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines, | ||
| 95 | u32 block_height, u32 block_depth, u32 pitch_linear) { | ||
| 96 | // The origin of the transformation can be configured here, leave it as zero as the current API | ||
| 97 | // doesn't expose it. | ||
| 98 | static constexpr u32 origin_z = 0; | ||
| 99 | |||
| 100 | // We can configure here a custom pitch | ||
| 101 | // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch. | ||
| 102 | const u32 pitch = pitch_linear; | ||
| 103 | const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT); | ||
| 104 | |||
| 105 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||
| 106 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 107 | const u32 slice_size = | ||
| 108 | Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; | ||
| 109 | |||
| 110 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 111 | const u32 block_depth_mask = (1U << block_depth) - 1; | ||
| 112 | const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; | ||
| 113 | |||
| 114 | u32 unprocessed_lines = num_lines; | ||
| 115 | u32 extent_y = std::min(num_lines, height - origin_y); | ||
| 116 | |||
| 117 | for (u32 slice = 0; slice < depth; ++slice) { | ||
| 118 | const u32 z = slice + origin_z; | ||
| 119 | const u32 offset_z = (z >> block_depth) * slice_size + | ||
| 120 | ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); | ||
| 121 | const u32 lines_in_y = std::min(unprocessed_lines, extent_y); | ||
| 122 | for (u32 line = 0; line < lines_in_y; ++line) { | ||
| 123 | const u32 y = line + origin_y; | ||
| 124 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y); | ||
| 125 | |||
| 126 | const u32 block_y = y >> GOB_SIZE_Y_SHIFT; | ||
| 127 | const u32 offset_y = (block_y >> block_height) * block_size + | ||
| 128 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); | ||
| 129 | |||
| 130 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL); | ||
| 131 | for (u32 column = 0; column < extent_x; | ||
| 132 | ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) { | ||
| 133 | const u32 x = (column + origin_x) * BYTES_PER_PIXEL; | ||
| 134 | const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 135 | |||
| 136 | const u32 base_swizzled_offset = offset_z + offset_y + offset_x; | ||
| 137 | const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y); | ||
| 138 | |||
| 139 | const u32 unswizzled_offset = | ||
| 140 | slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; | ||
| 141 | |||
| 142 | u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; | ||
| 143 | const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; | ||
| 144 | |||
| 145 | std::memcpy(dst, src, BYTES_PER_PIXEL); | ||
| 146 | } | ||
| 147 | } | ||
| 148 | unprocessed_lines -= lines_in_y; | ||
| 149 | if (unprocessed_lines == 0) { | ||
| 150 | return; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 92 | template <bool TO_LINEAR> | 155 | template <bool TO_LINEAR> |
| 93 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | 156 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 94 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | 157 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { |
| @@ -111,97 +174,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe | |||
| 111 | } | 174 | } |
| 112 | } | 175 | } |
| 113 | 176 | ||
| 114 | template <u32 BYTES_PER_PIXEL> | ||
| 115 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | ||
| 116 | u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, | ||
| 117 | u32 offset_x, u32 offset_y) { | ||
| 118 | const u32 block_height = 1U << block_height_bit; | ||
| 119 | const u32 image_width_in_gobs = | ||
| 120 | (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X; | ||
| 121 | for (u32 line = 0; line < subrect_height; ++line) { | ||
| 122 | const u32 dst_y = line + offset_y; | ||
| 123 | const u32 gob_address_y = | ||
| 124 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | ||
| 125 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | ||
| 126 | |||
| 127 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(dst_y); | ||
| 128 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(offset_x * BYTES_PER_PIXEL); | ||
| 129 | for (u32 x = 0; x < subrect_width; | ||
| 130 | ++x, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) { | ||
| 131 | const u32 dst_x = x + offset_x; | ||
| 132 | const u32 gob_address = | ||
| 133 | gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; | ||
| 134 | const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y); | ||
| 135 | const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; | ||
| 136 | |||
| 137 | const u8* const source_line = unswizzled_data + unswizzled_offset; | ||
| 138 | u8* const dest_addr = swizzled_data + swizzled_offset; | ||
| 139 | std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | template <u32 BYTES_PER_PIXEL> | ||
| 145 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height, | ||
| 146 | u32 origin_x, u32 origin_y, u8* output, const u8* input) { | ||
| 147 | const u32 stride = width * BYTES_PER_PIXEL; | ||
| 148 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | ||
| 149 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); | ||
| 150 | |||
| 151 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 152 | const u32 x_shift = GOB_SIZE_SHIFT + block_height; | ||
| 153 | |||
| 154 | for (u32 line = 0; line < line_count; ++line) { | ||
| 155 | const u32 src_y = line + origin_y; | ||
| 156 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(src_y); | ||
| 157 | |||
| 158 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; | ||
| 159 | const u32 src_offset_y = (block_y >> block_height) * block_size + | ||
| 160 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); | ||
| 161 | |||
| 162 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL); | ||
| 163 | for (u32 column = 0; column < line_length_in; | ||
| 164 | ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) { | ||
| 165 | const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; | ||
| 166 | const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 167 | |||
| 168 | const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y); | ||
| 169 | const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; | ||
| 170 | |||
| 171 | std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | template <u32 BYTES_PER_PIXEL> | ||
| 177 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 178 | u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output, | ||
| 179 | const u8* input) { | ||
| 180 | UNIMPLEMENTED_IF(origin_x > 0); | ||
| 181 | UNIMPLEMENTED_IF(origin_y > 0); | ||
| 182 | |||
| 183 | const u32 stride = width * BYTES_PER_PIXEL; | ||
| 184 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | ||
| 185 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 186 | |||
| 187 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 188 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; | ||
| 189 | |||
| 190 | for (u32 line = 0; line < line_count; ++line) { | ||
| 191 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(line); | ||
| 192 | const u32 block_y = line / GOB_SIZE_Y; | ||
| 193 | const u32 dst_offset_y = | ||
| 194 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | ||
| 195 | |||
| 196 | u32 swizzled_x = 0; | ||
| 197 | for (u32 x = 0; x < line_length_in; ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) { | ||
| 198 | const u32 dst_offset = | ||
| 199 | ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y); | ||
| 200 | const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; | ||
| 201 | std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | } | ||
| 205 | } // Anonymous namespace | 177 | } // Anonymous namespace |
| 206 | 178 | ||
| 207 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | 179 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| @@ -218,15 +190,15 @@ void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_p | |||
| 218 | stride_alignment); | 190 | stride_alignment); |
| 219 | } | 191 | } |
| 220 | 192 | ||
| 221 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 193 | void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 222 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, | 194 | u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y, |
| 223 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | 195 | u32 block_height, u32 block_depth, u32 pitch_linear) { |
| 224 | switch (bytes_per_pixel) { | 196 | switch (bytes_per_pixel) { |
| 225 | #define BPP_CASE(x) \ | 197 | #define BPP_CASE(x) \ |
| 226 | case x: \ | 198 | case x: \ |
| 227 | return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ | 199 | return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \ |
| 228 | swizzled_data, unswizzled_data, block_height_bit, offset_x, \ | 200 | origin_y, extent_x, extent_y, block_height, \ |
| 229 | offset_y); | 201 | block_depth, pitch_linear); |
| 230 | BPP_CASE(1) | 202 | BPP_CASE(1) |
| 231 | BPP_CASE(2) | 203 | BPP_CASE(2) |
| 232 | BPP_CASE(3) | 204 | BPP_CASE(3) |
| @@ -241,13 +213,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 241 | } | 213 | } |
| 242 | } | 214 | } |
| 243 | 215 | ||
| 244 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, | 216 | void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 245 | u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { | 217 | u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, |
| 218 | u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) { | ||
| 246 | switch (bytes_per_pixel) { | 219 | switch (bytes_per_pixel) { |
| 247 | #define BPP_CASE(x) \ | 220 | #define BPP_CASE(x) \ |
| 248 | case x: \ | 221 | case x: \ |
| 249 | return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ | 222 | return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \ |
| 250 | origin_x, origin_y, output, input); | 223 | origin_y, extent_x, extent_y, block_height, \ |
| 224 | block_depth, pitch_linear); | ||
| 251 | BPP_CASE(1) | 225 | BPP_CASE(1) |
| 252 | BPP_CASE(2) | 226 | BPP_CASE(2) |
| 253 | BPP_CASE(3) | 227 | BPP_CASE(3) |
| @@ -262,55 +236,6 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, | |||
| 262 | } | 236 | } |
| 263 | } | 237 | } |
| 264 | 238 | ||
| 265 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 266 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 267 | u32 origin_y, u8* output, const u8* input) { | ||
| 268 | switch (bytes_per_pixel) { | ||
| 269 | #define BPP_CASE(x) \ | ||
| 270 | case x: \ | ||
| 271 | return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \ | ||
| 272 | block_height, block_depth, origin_x, origin_y, output, \ | ||
| 273 | input); | ||
| 274 | BPP_CASE(1) | ||
| 275 | BPP_CASE(2) | ||
| 276 | BPP_CASE(3) | ||
| 277 | BPP_CASE(4) | ||
| 278 | BPP_CASE(6) | ||
| 279 | BPP_CASE(8) | ||
| 280 | BPP_CASE(12) | ||
| 281 | BPP_CASE(16) | ||
| 282 | #undef BPP_CASE | ||
| 283 | default: | ||
| 284 | ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||
| 289 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, | ||
| 290 | u8* swizzle_data) { | ||
| 291 | const u32 block_height = 1U << block_height_bit; | ||
| 292 | const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; | ||
| 293 | std::size_t count = 0; | ||
| 294 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | ||
| 295 | const std::size_t gob_address_y = | ||
| 296 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | ||
| 297 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | ||
| 298 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(static_cast<u32>(y)); | ||
| 299 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(dst_x); | ||
| 300 | for (std::size_t x = dst_x; x < width && count < copy_size; | ||
| 301 | ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) { | ||
| 302 | const std::size_t gob_address = | ||
| 303 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; | ||
| 304 | const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y); | ||
| 305 | const u8* source_line = source_data + count; | ||
| 306 | u8* dest_addr = swizzle_data + swizzled_offset; | ||
| 307 | count++; | ||
| 308 | |||
| 309 | *dest_addr = *source_line; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 239 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 315 | u32 block_height, u32 block_depth) { | 240 | u32 block_height, u32 block_depth) { |
| 316 | if (tiled) { | 241 | if (tiled) { |