diff options
| author | 2020-07-04 18:42:10 -0300 | |
|---|---|---|
| committer | 2020-07-10 04:09:32 -0300 | |
| commit | c574ab5aa1d3ff81b28ddfbba3818b3ce724aa32 (patch) | |
| tree | bfd4107fc64c09c767be6bfe79c4904075ce0307 /src/video_core/textures/decoders.cpp | |
| parent | maxwell_dma: Rename registers to match official docs and reorder (diff) | |
| download | yuzu-c574ab5aa1d3ff81b28ddfbba3818b3ce724aa32.tar.gz yuzu-c574ab5aa1d3ff81b28ddfbba3818b3ce724aa32.tar.xz yuzu-c574ab5aa1d3ff81b28ddfbba3818b3ce724aa32.zip | |
video_core/textures: Add and use SwizzleSliceToVoxel, and minor style changes
Change GOB sizes from free-functions to constexpr constants.
Add SwizzleSliceToVoxel, a function that swizzles a 2D array of pixels
into a 3D texture and use it for 3D copies.
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 134 |
1 files changed, 77 insertions, 57 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 548e4c3fe..98beabef1 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/bit_util.h" | ||
| 9 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 10 | #include "video_core/textures/decoders.h" | 11 | #include "video_core/textures/decoders.h" |
| 11 | #include "video_core/textures/texture.h" | 12 | #include "video_core/textures/texture.h" |
| @@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable { | |||
| 37 | std::array<std::array<u16, M>, N> values{}; | 38 | std::array<std::array<u16, M>, N> values{}; |
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | constexpr u32 gob_size_x_shift = 6; | 41 | constexpr u32 FAST_SWIZZLE_ALIGN = 16; |
| 41 | constexpr u32 gob_size_y_shift = 3; | ||
| 42 | constexpr u32 gob_size_z_shift = 0; | ||
| 43 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; | ||
| 44 | 42 | ||
| 45 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | 43 | constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); |
| 46 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | 44 | constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); |
| 47 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 48 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 49 | |||
| 50 | constexpr u32 fast_swizzle_align = 16; | ||
| 51 | |||
| 52 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | ||
| 53 | constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>(); | ||
| 54 | 45 | ||
| 55 | /** | 46 | /** |
| 56 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 47 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. |
| @@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con | |||
| 69 | u32 y_address = z_address; | 60 | u32 y_address = z_address; |
| 70 | u32 pixel_base = layer_z * z + y_start * stride_x; | 61 | u32 pixel_base = layer_z * z + y_start * stride_x; |
| 71 | for (u32 y = y_start; y < y_end; y++) { | 62 | for (u32 y = y_start; y < y_end; y++) { |
| 72 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 63 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 73 | for (u32 x = x_start; x < x_end; x++) { | 64 | for (u32 x = x_start; x < x_end; x++) { |
| 74 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]}; | 65 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; |
| 75 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; | 66 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; |
| 76 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | 67 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; |
| 77 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | 68 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; |
| 78 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | 69 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); |
| 79 | } | 70 | } |
| 80 | pixel_base += stride_x; | 71 | pixel_base += stride_x; |
| 81 | if ((y + 1) % gob_size_y == 0) | 72 | if ((y + 1) % GOB_SIZE_Y == 0) |
| 82 | y_address += gob_size; | 73 | y_address += GOB_SIZE; |
| 83 | } | 74 | } |
| 84 | z_address += xy_block_size; | 75 | z_address += xy_block_size; |
| 85 | } | 76 | } |
| @@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const | |||
| 104 | u32 y_address = z_address; | 95 | u32 y_address = z_address; |
| 105 | u32 pixel_base = layer_z * z + y_start * stride_x; | 96 | u32 pixel_base = layer_z * z + y_start * stride_x; |
| 106 | for (u32 y = y_start; y < y_end; y++) { | 97 | for (u32 y = y_start; y < y_end; y++) { |
| 107 | const auto& table = fast_swizzle_table[y % gob_size_y]; | 98 | const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 108 | for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) { | 99 | for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { |
| 109 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; | 100 | const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; |
| 110 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | 101 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; |
| 111 | const u32 pixel_index{out_x + pixel_base}; | 102 | const u32 pixel_index{out_x + pixel_base}; |
| 112 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; | 103 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; |
| 113 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; | 104 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; |
| 114 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); | 105 | std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); |
| 115 | } | 106 | } |
| 116 | pixel_base += stride_x; | 107 | pixel_base += stride_x; |
| 117 | if ((y + 1) % gob_size_y == 0) | 108 | if ((y + 1) % GOB_SIZE_Y == 0) |
| 118 | y_address += gob_size; | 109 | y_address += GOB_SIZE; |
| 119 | } | 110 | } |
| 120 | z_address += xy_block_size; | 111 | z_address += xy_block_size; |
| 121 | } | 112 | } |
| @@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 138 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 129 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; |
| 139 | const u32 stride_x = width * out_bytes_per_pixel; | 130 | const u32 stride_x = width * out_bytes_per_pixel; |
| 140 | const u32 layer_z = height * stride_x; | 131 | const u32 layer_z = height * stride_x; |
| 141 | const u32 gob_elements_x = gob_size_x / bytes_per_pixel; | 132 | const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; |
| 142 | constexpr u32 gob_elements_y = gob_size_y; | 133 | constexpr u32 gob_elements_y = GOB_SIZE_Y; |
| 143 | constexpr u32 gob_elements_z = gob_size_z; | 134 | constexpr u32 gob_elements_z = GOB_SIZE_Z; |
| 144 | const u32 block_x_elements = gob_elements_x; | 135 | const u32 block_x_elements = gob_elements_x; |
| 145 | const u32 block_y_elements = gob_elements_y * block_height; | 136 | const u32 block_y_elements = gob_elements_y * block_height; |
| 146 | const u32 block_z_elements = gob_elements_z * block_depth; | 137 | const u32 block_z_elements = gob_elements_z * block_depth; |
| @@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 148 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); | 139 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); |
| 149 | const u32 blocks_on_y = div_ceil(height, block_y_elements); | 140 | const u32 blocks_on_y = div_ceil(height, block_y_elements); |
| 150 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); | 141 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); |
| 151 | const u32 xy_block_size = gob_size * block_height; | 142 | const u32 xy_block_size = GOB_SIZE * block_height; |
| 152 | const u32 block_size = xy_block_size * block_depth; | 143 | const u32 block_size = xy_block_size * block_depth; |
| 153 | u32 tile_offset = 0; | 144 | u32 tile_offset = 0; |
| 154 | for (u32 zb = 0; zb < blocks_on_z; zb++) { | 145 | for (u32 zb = 0; zb < blocks_on_z; zb++) { |
| @@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | |||
| 182 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 183 | const u32 block_height_size{1U << block_height}; | 174 | const u32 block_height_size{1U << block_height}; |
| 184 | const u32 block_depth_size{1U << block_depth}; | 175 | const u32 block_depth_size{1U << block_depth}; |
| 185 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 176 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { |
| 186 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 177 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 187 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | 178 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 188 | block_depth_size, width_spacing); | 179 | block_depth_size, width_spacing); |
| @@ -259,25 +250,26 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 259 | } | 250 | } |
| 260 | 251 | ||
| 261 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 252 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 262 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 253 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, |
| 263 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | 254 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 264 | const u32 block_height = 1U << block_height_bit; | 255 | const u32 block_height = 1U << block_height_bit; |
| 265 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 256 | const u32 image_width_in_gobs = |
| 266 | gob_size_x}; | 257 | (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; |
| 267 | for (u32 line = 0; line < subrect_height; ++line) { | 258 | for (u32 line = 0; line < subrect_height; ++line) { |
| 268 | const u32 dst_y = line + offset_y; | 259 | const u32 dst_y = line + offset_y; |
| 269 | const u32 gob_address_y = | 260 | const u32 gob_address_y = |
| 270 | (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 261 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 271 | ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 262 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 272 | const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; | 263 | const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; |
| 273 | for (u32 x = 0; x < subrect_width; ++x) { | 264 | for (u32 x = 0; x < subrect_width; ++x) { |
| 274 | const u32 dst_x = x + offset_x; | 265 | const u32 dst_x = x + offset_x; |
| 275 | const u32 gob_address = | 266 | const u32 gob_address = |
| 276 | gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 267 | gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 277 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; | 268 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; |
| 278 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 269 | const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; |
| 279 | u8* dest_addr = swizzled_data + swizzled_offset; | ||
| 280 | 270 | ||
| 271 | const u8* const source_line = unswizzled_data + unswizzled_offset; | ||
| 272 | u8* const dest_addr = swizzled_data + swizzled_offset; | ||
| 281 | std::memcpy(dest_addr, source_line, bytes_per_pixel); | 273 | std::memcpy(dest_addr, source_line, bytes_per_pixel); |
| 282 | } | 274 | } |
| 283 | } | 275 | } |
| @@ -289,14 +281,15 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 289 | const u32 block_height = 1U << block_height_bit; | 281 | const u32 block_height = 1U << block_height_bit; |
| 290 | for (u32 line = 0; line < subrect_height; ++line) { | 282 | for (u32 line = 0; line < subrect_height; ++line) { |
| 291 | const u32 y2 = line + offset_y; | 283 | const u32 y2 = line + offset_y; |
| 292 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 284 | const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height + |
| 293 | ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 285 | ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 294 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; | 286 | const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y]; |
| 295 | for (u32 x = 0; x < subrect_width; ++x) { | 287 | for (u32 x = 0; x < subrect_width; ++x) { |
| 296 | const u32 x2 = (x + offset_x) * bytes_per_pixel; | 288 | const u32 x2 = (x + offset_x) * bytes_per_pixel; |
| 297 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; | 289 | const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 298 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | 290 | const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X]; |
| 299 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | 291 | const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel; |
| 292 | u8* dest_line = unswizzled_data + unswizzled_offset; | ||
| 300 | u8* source_addr = swizzled_data + swizzled_offset; | 293 | u8* source_addr = swizzled_data + swizzled_offset; |
| 301 | 294 | ||
| 302 | std::memcpy(dest_line, source_addr, bytes_per_pixel); | 295 | std::memcpy(dest_line, source_addr, bytes_per_pixel); |
| @@ -304,21 +297,48 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 304 | } | 297 | } |
| 305 | } | 298 | } |
| 306 | 299 | ||
| 300 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 301 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 302 | u32 origin_y, u8* output, const u8* input) { | ||
| 303 | UNIMPLEMENTED_IF(origin_x > 0); | ||
| 304 | UNIMPLEMENTED_IF(origin_y > 0); | ||
| 305 | |||
| 306 | const u32 stride = width * bytes_per_pixel; | ||
| 307 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | ||
| 308 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 309 | |||
| 310 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 311 | const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); | ||
| 312 | |||
| 313 | for (u32 line = 0; line < line_count; ++line) { | ||
| 314 | const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; | ||
| 315 | const u32 block_y = line / GOB_SIZE_Y; | ||
| 316 | const u32 dst_offset_y = | ||
| 317 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | ||
| 318 | for (u32 x = 0; x < line_length_in; ++x) { | ||
| 319 | const u32 dst_offset = | ||
| 320 | ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; | ||
| 321 | const u32 src_offset = x * bytes_per_pixel + line * pitch; | ||
| 322 | std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); | ||
| 323 | } | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 307 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 327 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 308 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, | 328 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| 309 | u8* swizzle_data) { | 329 | u8* swizzle_data) { |
| 310 | const u32 block_height = 1U << block_height_bit; | 330 | const u32 block_height = 1U << block_height_bit; |
| 311 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 331 | const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; |
| 312 | std::size_t count = 0; | 332 | std::size_t count = 0; |
| 313 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 333 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| 314 | const std::size_t gob_address_y = | 334 | const std::size_t gob_address_y = |
| 315 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 335 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 316 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 336 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 317 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 337 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 318 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 338 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 319 | const std::size_t gob_address = | 339 | const std::size_t gob_address = |
| 320 | gob_address_y + (x / gob_size_x) * gob_size * block_height; | 340 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 321 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | 341 | const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X]; |
| 322 | const u8* source_line = source_data + count; | 342 | const u8* source_line = source_data + count; |
| 323 | u8* dest_addr = swizzle_data + swizzled_offset; | 343 | u8* dest_addr = swizzle_data + swizzled_offset; |
| 324 | count++; | 344 | count++; |
| @@ -373,9 +393,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 373 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 393 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 374 | u32 block_height, u32 block_depth) { | 394 | u32 block_height, u32 block_depth) { |
| 375 | if (tiled) { | 395 | if (tiled) { |
| 376 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); | 396 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT); |
| 377 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); | 397 | const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height); |
| 378 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); | 398 | const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth); |
| 379 | return aligned_width * aligned_height * aligned_depth; | 399 | return aligned_width * aligned_height * aligned_depth; |
| 380 | } else { | 400 | } else { |
| 381 | return width * height * depth * bytes_per_pixel; | 401 | return width * height * depth * bytes_per_pixel; |
| @@ -386,14 +406,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | |||
| 386 | u32 bytes_per_pixel) { | 406 | u32 bytes_per_pixel) { |
| 387 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 407 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; |
| 388 | const u32 gobs_in_block = 1 << block_height; | 408 | const u32 gobs_in_block = 1 << block_height; |
| 389 | const u32 y_blocks = gob_size_y << block_height; | 409 | const u32 y_blocks = GOB_SIZE_Y << block_height; |
| 390 | const u32 x_per_gob = gob_size_x / bytes_per_pixel; | 410 | const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel; |
| 391 | const u32 x_blocks = div_ceil(width, x_per_gob); | 411 | const u32 x_blocks = div_ceil(width, x_per_gob); |
| 392 | const u32 block_size = gob_size * gobs_in_block; | 412 | const u32 block_size = GOB_SIZE * gobs_in_block; |
| 393 | const u32 stride = block_size * x_blocks; | 413 | const u32 stride = block_size * x_blocks; |
| 394 | const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; | 414 | const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; |
| 395 | const u32 relative_y = dst_y % y_blocks; | 415 | const u32 relative_y = dst_y % y_blocks; |
| 396 | return base + (relative_y / gob_size_y) * gob_size; | 416 | return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE; |
| 397 | } | 417 | } |
| 398 | 418 | ||
| 399 | } // namespace Tegra::Texture | 419 | } // namespace Tegra::Texture |