diff options
| author | 2019-05-10 04:17:48 -0300 | |
|---|---|---|
| committer | 2019-06-20 21:36:12 -0300 | |
| commit | 345e73f2feb0701e3c3099d002a1c21fb524eae4 (patch) | |
| tree | c8c934dfec804d04a29f8ee27124274f5f999fb8 /src/video_core/textures/decoders.cpp | |
| parent | texture_cache: Change internal cache from lists to vectors (diff) | |
| download | yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.tar.gz yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.tar.xz yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.zip | |
video_core: Use un-shifted block sizes to avoid integer divisions
Instead of storing all block width, height and depths in their shifted
form:
block_width = 1U << block_shift;
Store them like they are provided by the emulated hardware (their
block_shift form). This way we can avoid doing the costly
Common::AlignUp operation to align texture sizes and drop CPU integer
divisions with bitwise logic (defined in Common::AlignBits).
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 55 |
1 files changed, 35 insertions, 20 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..f45fd175a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { | |||
| 36 | std::array<std::array<u16, M>, N> values{}; | 36 | std::array<std::array<u16, M>, N> values{}; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | constexpr u32 gob_size_x = 64; | 39 | constexpr u32 gob_size_x_shift = 6; |
| 40 | constexpr u32 gob_size_y = 8; | 40 | constexpr u32 gob_size_y_shift = 3; |
| 41 | constexpr u32 gob_size_z = 1; | 41 | constexpr u32 gob_size_z_shift = 0; |
| 42 | constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | 42 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; |
| 43 | |||
| 44 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | ||
| 45 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | ||
| 46 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 47 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 48 | |||
| 43 | constexpr u32 fast_swizzle_align = 16; | 49 | constexpr u32 fast_swizzle_align = 16; |
| 44 | 50 | ||
| 45 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | 51 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); |
| @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 177 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 178 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 179 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 180 | const u32 block_height_size{1U << block_height}; | ||
| 181 | const u32 block_depth_size{1U << block_depth}; | ||
| 174 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 182 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { |
| 175 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 183 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 176 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 184 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 177 | width_spacing); | 185 | block_depth_size, width_spacing); |
| 178 | } else { | 186 | } else { |
| 179 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 187 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 180 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 188 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 181 | width_spacing); | 189 | block_depth_size, width_spacing); |
| 182 | } | 190 | } |
| 183 | } | 191 | } |
| 184 | 192 | ||
| @@ -249,16 +257,18 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 249 | 257 | ||
| 250 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 251 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { |
| 260 | const u32 block_height_size{1U << block_height}; | ||
| 252 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 261 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 253 | gob_size_x}; | 262 | gob_size_x}; |
| 254 | for (u32 line = 0; line < subrect_height; ++line) { | 263 | for (u32 line = 0; line < subrect_height; ++line) { |
| 255 | const u32 gob_address_y = | 264 | const u32 gob_address_y = |
| 256 | (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 265 | (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * |
| 257 | ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 266 | image_width_in_gobs + |
| 267 | ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 258 | const auto& table = legacy_swizzle_table[line % gob_size_y]; | 268 | const auto& table = legacy_swizzle_table[line % gob_size_y]; |
| 259 | for (u32 x = 0; x < subrect_width; ++x) { | 269 | for (u32 x = 0; x < subrect_width; ++x) { |
| 260 | const u32 gob_address = | 270 | const u32 gob_address = |
| 261 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 271 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; |
| 262 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 272 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; |
| 263 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 273 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 264 | u8* dest_addr = swizzled_data + swizzled_offset; | 274 | u8* dest_addr = swizzled_data + swizzled_offset; |
| @@ -271,14 +281,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 271 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 281 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 272 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 282 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 273 | u32 offset_x, u32 offset_y) { | 283 | u32 offset_x, u32 offset_y) { |
| 284 | const u32 block_height_size{1U << block_height}; | ||
| 274 | for (u32 line = 0; line < subrect_height; ++line) { | 285 | for (u32 line = 0; line < subrect_height; ++line) { |
| 275 | const u32 y2 = line + offset_y; | 286 | const u32 y2 = line + offset_y; |
| 276 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 287 | const u32 gob_address_y = |
| 277 | ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 288 | (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + |
| 289 | ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 278 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; | 290 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; |
| 279 | for (u32 x = 0; x < subrect_width; ++x) { | 291 | for (u32 x = 0; x < subrect_width; ++x) { |
| 280 | const u32 x2 = (x + offset_x) * bytes_per_pixel; | 292 | const u32 x2 = (x + offset_x) * bytes_per_pixel; |
| 281 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; | 293 | const u32 gob_address = |
| 294 | gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; | ||
| 282 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | 295 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; |
| 283 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | 296 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; |
| 284 | u8* source_addr = swizzled_data + swizzled_offset; | 297 | u8* source_addr = swizzled_data + swizzled_offset; |
| @@ -291,16 +304,18 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 304 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 305 | const u32 block_height, const std::size_t copy_size, const u8* source_data, |
| 293 | u8* swizzle_data) { | 306 | u8* swizzle_data) { |
| 307 | const u32 block_height_size{1U << block_height}; | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 308 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; |
| 295 | std::size_t count = 0; | 309 | std::size_t count = 0; |
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 310 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| 297 | const std::size_t gob_address_y = | 311 | const std::size_t gob_address_y = |
| 298 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 312 | (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * |
| 299 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 313 | image_width_in_gobs + |
| 314 | ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 300 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 315 | const auto& table = legacy_swizzle_table[y % gob_size_y]; |
| 301 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 316 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 302 | const std::size_t gob_address = | 317 | const std::size_t gob_address = |
| 303 | gob_address_y + (x / gob_size_x) * gob_size * block_height; | 318 | gob_address_y + (x / gob_size_x) * gob_size * block_height_size; |
| 304 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | 319 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; |
| 305 | const u8* source_line = source_data + count; | 320 | const u8* source_line = source_data + count; |
| 306 | u8* dest_addr = swizzle_data + swizzled_offset; | 321 | u8* dest_addr = swizzle_data + swizzled_offset; |
| @@ -356,9 +371,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 356 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 371 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 357 | u32 block_height, u32 block_depth) { | 372 | u32 block_height, u32 block_depth) { |
| 358 | if (tiled) { | 373 | if (tiled) { |
| 359 | const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); | 374 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); |
| 360 | const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); | 375 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); |
| 361 | const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); | 376 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); |
| 362 | return aligned_width * aligned_height * aligned_depth; | 377 | return aligned_width * aligned_height * aligned_depth; |
| 363 | } else { | 378 | } else { |
| 364 | return width * height * depth * bytes_per_pixel; | 379 | return width * height * depth * bytes_per_pixel; |