diff options
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 42 |
1 files changed, 27 insertions, 15 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..7e8295944 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { | |||
| 36 | std::array<std::array<u16, M>, N> values{}; | 36 | std::array<std::array<u16, M>, N> values{}; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | constexpr u32 gob_size_x = 64; | 39 | constexpr u32 gob_size_x_shift = 6; |
| 40 | constexpr u32 gob_size_y = 8; | 40 | constexpr u32 gob_size_y_shift = 3; |
| 41 | constexpr u32 gob_size_z = 1; | 41 | constexpr u32 gob_size_z_shift = 0; |
| 42 | constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | 42 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; |
| 43 | |||
| 44 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | ||
| 45 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | ||
| 46 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 47 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 48 | |||
| 43 | constexpr u32 fast_swizzle_align = 16; | 49 | constexpr u32 fast_swizzle_align = 16; |
| 44 | 50 | ||
| 45 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | 51 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); |
| @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 177 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 178 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 179 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 180 | const u32 block_height_size{1U << block_height}; | ||
| 181 | const u32 block_depth_size{1U << block_depth}; | ||
| 174 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 182 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { |
| 175 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 183 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 176 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 184 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 177 | width_spacing); | 185 | block_depth_size, width_spacing); |
| 178 | } else { | 186 | } else { |
| 179 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 187 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 180 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 188 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 181 | width_spacing); | 189 | block_depth_size, width_spacing); |
| 182 | } | 190 | } |
| 183 | } | 191 | } |
| 184 | 192 | ||
| @@ -248,7 +256,9 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 248 | } | 256 | } |
| 249 | 257 | ||
| 250 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 251 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 260 | u32 block_height_bit) { | ||
| 261 | const u32 block_height = 1U << block_height_bit; | ||
| 252 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 253 | gob_size_x}; | 263 | gob_size_x}; |
| 254 | for (u32 line = 0; line < subrect_height; ++line) { | 264 | for (u32 line = 0; line < subrect_height; ++line) { |
| @@ -269,8 +279,9 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 269 | } | 279 | } |
| 270 | 280 | ||
| 271 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 281 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 272 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 282 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 273 | u32 offset_x, u32 offset_y) { | 283 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 284 | const u32 block_height = 1U << block_height_bit; | ||
| 274 | for (u32 line = 0; line < subrect_height; ++line) { | 285 | for (u32 line = 0; line < subrect_height; ++line) { |
| 275 | const u32 y2 = line + offset_y; | 286 | const u32 y2 = line + offset_y; |
| 276 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 287 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + |
| @@ -289,8 +300,9 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 289 | } | 300 | } |
| 290 | 301 | ||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 302 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 303 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| 293 | u8* swizzle_data) { | 304 | u8* swizzle_data) { |
| 305 | const u32 block_height = 1U << block_height_bit; | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 306 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; |
| 295 | std::size_t count = 0; | 307 | std::size_t count = 0; |
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 308 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| @@ -356,9 +368,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 356 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 368 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 357 | u32 block_height, u32 block_depth) { | 369 | u32 block_height, u32 block_depth) { |
| 358 | if (tiled) { | 370 | if (tiled) { |
| 359 | const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); | 371 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); |
| 360 | const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); | 372 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); |
| 361 | const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); | 373 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); |
| 362 | return aligned_width * aligned_height * aligned_depth; | 374 | return aligned_width * aligned_height * aligned_depth; |
| 363 | } else { | 375 | } else { |
| 364 | return width * height * depth * bytes_per_pixel; | 376 | return width * height * depth * bytes_per_pixel; |