diff options
| author | 2018-09-21 12:09:59 -0400 | |
|---|---|---|
| committer | 2018-09-21 12:09:59 -0400 | |
| commit | 57b44200a2bf37cc4775ea88b5ff8ed9f67bc145 (patch) | |
| tree | 83558e511059595c62ca4418ac34829f28f2bc29 /src | |
| parent | Join both Swizzle methods within one interface function (diff) | |
| download | yuzu-57b44200a2bf37cc4775ea88b5ff8ed9f67bc145.tar.gz yuzu-57b44200a2bf37cc4775ea88b5ff8ed9f67bc145.tar.xz yuzu-57b44200a2bf37cc4775ea88b5ff8ed9f67bc145.zip | |
Reverse stride align restriction on FastSwizzle due to lost performance
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 5a2a0b84d..3d5476e5d 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -85,7 +85,6 @@ static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_ | |||
| 85 | const std::size_t pixel_base{y * width * out_bytes_per_pixel}; | 85 | const std::size_t pixel_base{y * width * out_bytes_per_pixel}; |
| 86 | const auto& table = fast_swizzle_table[y % gobs_in_y]; | 86 | const auto& table = fast_swizzle_table[y % gobs_in_y]; |
| 87 | for (std::size_t xb = 0; xb < stride; xb += copy_size) { | 87 | for (std::size_t xb = 0; xb < stride; xb += copy_size) { |
| 88 | const std::size_t truncated_copy = std::min(copy_size, stride - xb); | ||
| 89 | const std::size_t gob_address{initial_gob + | 88 | const std::size_t gob_address{initial_gob + |
| 90 | (xb / gobs_in_x) * gobs_size * block_height}; | 89 | (xb / gobs_in_x) * gobs_size * block_height}; |
| 91 | const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; | 90 | const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; |
| @@ -93,14 +92,14 @@ static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_ | |||
| 93 | const std::size_t pixel_index{out_x + pixel_base}; | 92 | const std::size_t pixel_index{out_x + pixel_base}; |
| 94 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | 93 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; |
| 95 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | 94 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; |
| 96 | std::memcpy(data_ptrs[0], data_ptrs[1], truncated_copy); | 95 | std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); |
| 97 | } | 96 | } |
| 98 | } | 97 | } |
| 99 | } | 98 | } |
| 100 | 99 | ||
| 101 | void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, | 100 | void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, |
| 102 | u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) { | 101 | u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) { |
| 103 | if (bytes_per_pixel % 3 != 0) { | 102 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { |
| 104 | FastSwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data, | 103 | FastSwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data, |
| 105 | unswizzled_data, unswizzle, block_height); | 104 | unswizzled_data, unswizzle, block_height); |
| 106 | } else { | 105 | } else { |