diff options
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.h | 2 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 134 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 38 |
5 files changed, 125 insertions, 84 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 28faad9ff..a2d3d7823 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -37,7 +37,8 @@ void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | |||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | void MaxwellDMA::Launch() { | 39 | void MaxwellDMA::Launch() { |
| 40 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); | 40 | LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in), |
| 41 | static_cast<GPUVAddr>(regs.offset_out)); | ||
| 41 | 42 | ||
| 42 | // TODO(Subv): Perform more research and implement all features of this engine. | 43 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 43 | const LaunchDMA& launch = regs.launch_dma; | 44 | const LaunchDMA& launch = regs.launch_dma; |
| @@ -97,7 +98,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 97 | 98 | ||
| 98 | // Optimized path for micro copies. | 99 | // Optimized path for micro copies. |
| 99 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 100 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 100 | if (dst_size < GetGOBSize() && regs.pitch_out <= 64) { | 101 | if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { |
| 101 | FastCopyBlockLinearToPitch(); | 102 | FastCopyBlockLinearToPitch(); |
| 102 | return; | 103 | return; |
| 103 | } | 104 | } |
| @@ -130,18 +131,15 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 130 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | 131 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |
| 131 | } | 132 | } |
| 132 | 133 | ||
| 133 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, src_params.width, | 134 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, |
| 134 | bytes_per_pixel, read_buffer.data() + src_layer_size * src_params.layer, | 135 | read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(), |
| 135 | write_buffer.data(), src_params.block_size.height, src_params.origin.x, | 136 | block_height, src_params.origin.x, src_params.origin.y); |
| 136 | src_params.origin.y); | ||
| 137 | 137 | ||
| 138 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 138 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 139 | } | 139 | } |
| 140 | 140 | ||
| 141 | void MaxwellDMA::CopyPitchToBlockLinear() { | 141 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 142 | const auto& dst_params = regs.dst_params; | 142 | const auto& dst_params = regs.dst_params; |
| 143 | ASSERT(dst_params.block_size.depth == 0); | ||
| 144 | |||
| 145 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | 143 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; |
| 146 | const u32 width = dst_params.width; | 144 | const u32 width = dst_params.width; |
| 147 | const u32 height = dst_params.height; | 145 | const u32 height = dst_params.height; |
| @@ -171,17 +169,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 171 | } | 169 | } |
| 172 | 170 | ||
| 173 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 171 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 174 | SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, dst_params.width, | 172 | if (regs.dst_params.block_size.depth > 0) { |
| 175 | bytes_per_pixel, write_buffer.data() + dst_layer_size * dst_params.layer, | 173 | ASSERT(dst_params.layer == 0); |
| 176 | read_buffer.data(), dst_params.block_size.height, dst_params.origin.x, | 174 | SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, |
| 177 | dst_params.origin.y); | 175 | bytes_per_pixel, block_height, block_depth, dst_params.origin.x, |
| 176 | dst_params.origin.y, write_buffer.data(), read_buffer.data()); | ||
| 177 | } else { | ||
| 178 | SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel, | ||
| 179 | write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(), | ||
| 180 | block_height, dst_params.origin.x, dst_params.origin.y); | ||
| 181 | } | ||
| 178 | 182 | ||
| 179 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 183 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 180 | } | 184 | } |
| 181 | 185 | ||
| 182 | void MaxwellDMA::FastCopyBlockLinearToPitch() { | 186 | void MaxwellDMA::FastCopyBlockLinearToPitch() { |
| 183 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; | 187 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; |
| 184 | const size_t src_size = GetGOBSize(); | 188 | const size_t src_size = GOB_SIZE; |
| 185 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 189 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 186 | u32 pos_x = regs.src_params.origin.x; | 190 | u32 pos_x = regs.src_params.origin.x; |
| 187 | u32 pos_y = regs.src_params.origin.y; | 191 | u32 pos_y = regs.src_params.origin.y; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 0b2b2b8c4..921562c1f 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -343,8 +343,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 343 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | 343 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); |
| 344 | } | 344 | } |
| 345 | if (is_tiled && is_layered) { | 345 | if (is_tiled && is_layered) { |
| 346 | return Common::AlignBits(size, | 346 | return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); |
| 347 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 348 | } | 347 | } |
| 349 | return size; | 348 | return size; |
| 350 | } | 349 | } |
| @@ -418,7 +417,7 @@ std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { | |||
| 418 | const u32 block_size = GetBlockSize(); | 417 | const u32 block_size = GetBlockSize(); |
| 419 | const u32 block_index = offset / block_size; | 418 | const u32 block_index = offset / block_size; |
| 420 | const u32 gob_offset = offset % block_size; | 419 | const u32 gob_offset = offset % block_size; |
| 421 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize()); | 420 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); |
| 422 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); | 421 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); |
| 423 | const u32 x_block_pixels = x_gob_pixels << block_width; | 422 | const u32 x_block_pixels = x_gob_pixels << block_width; |
| 424 | const u32 y_block_pixels = 8U << block_height; | 423 | const u32 y_block_pixels = 8U << block_height; |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 24957df8d..118aa689e 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -204,7 +204,7 @@ public: | |||
| 204 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | 204 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, |
| 205 | const u32 block_depth) { | 205 | const u32 block_depth) { |
| 206 | return Common::AlignBits(out_size, | 206 | return Common::AlignBits(out_size, |
| 207 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | 207 | Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | /// Converts a width from a type of surface into another. This helps represent the | 210 | /// Converts a width from a type of surface into another. This helps represent the |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 548e4c3fe..98beabef1 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/bit_util.h" | ||
| 9 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 10 | #include "video_core/textures/decoders.h" | 11 | #include "video_core/textures/decoders.h" |
| 11 | #include "video_core/textures/texture.h" | 12 | #include "video_core/textures/texture.h" |
| @@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable { | |||
| 37 | std::array<std::array<u16, M>, N> values{}; | 38 | std::array<std::array<u16, M>, N> values{}; |
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | constexpr u32 gob_size_x_shift = 6; | 41 | constexpr u32 FAST_SWIZZLE_ALIGN = 16; |
| 41 | constexpr u32 gob_size_y_shift = 3; | ||
| 42 | constexpr u32 gob_size_z_shift = 0; | ||
| 43 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; | ||
| 44 | 42 | ||
| 45 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | 43 | constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); |
| 46 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | 44 | constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); |
| 47 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 48 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 49 | |||
| 50 | constexpr u32 fast_swizzle_align = 16; | ||
| 51 | |||
| 52 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | ||
| 53 | constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>(); | ||
| 54 | 45 | ||
| 55 | /** | 46 | /** |
| 56 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 47 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. |
| @@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con | |||
| 69 | u32 y_address = z_address; | 60 | u32 y_address = z_address; |
| 70 | u32 pixel_base = layer_z * z + y_start * stride_x; | 61 | u32 pixel_base = layer_z * z + y_start * stride_x; |
| 71 | for (u32 y = y_start; y < y_end; y++) { | 62 | for (u32 y = y_start; y < y_end; y++) { |
| 72 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 63 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 73 | for (u32 x = x_start; x < x_end; x++) { | 64 | for (u32 x = x_start; x < x_end; x++) { |
| 74 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]}; | 65 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; |
| 75 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; | 66 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; |
| 76 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | 67 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; |
| 77 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | 68 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; |
| 78 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | 69 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); |
| 79 | } | 70 | } |
| 80 | pixel_base += stride_x; | 71 | pixel_base += stride_x; |
| 81 | if ((y + 1) % gob_size_y == 0) | 72 | if ((y + 1) % GOB_SIZE_Y == 0) |
| 82 | y_address += gob_size; | 73 | y_address += GOB_SIZE; |
| 83 | } | 74 | } |
| 84 | z_address += xy_block_size; | 75 | z_address += xy_block_size; |
| 85 | } | 76 | } |
| @@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const | |||
| 104 | u32 y_address = z_address; | 95 | u32 y_address = z_address; |
| 105 | u32 pixel_base = layer_z * z + y_start * stride_x; | 96 | u32 pixel_base = layer_z * z + y_start * stride_x; |
| 106 | for (u32 y = y_start; y < y_end; y++) { | 97 | for (u32 y = y_start; y < y_end; y++) { |
| 107 | const auto& table = fast_swizzle_table[y % gob_size_y]; | 98 | const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 108 | for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) { | 99 | for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { |
| 109 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; | 100 | const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; |
| 110 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | 101 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; |
| 111 | const u32 pixel_index{out_x + pixel_base}; | 102 | const u32 pixel_index{out_x + pixel_base}; |
| 112 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; | 103 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; |
| 113 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; | 104 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; |
| 114 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); | 105 | std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); |
| 115 | } | 106 | } |
| 116 | pixel_base += stride_x; | 107 | pixel_base += stride_x; |
| 117 | if ((y + 1) % gob_size_y == 0) | 108 | if ((y + 1) % GOB_SIZE_Y == 0) |
| 118 | y_address += gob_size; | 109 | y_address += GOB_SIZE; |
| 119 | } | 110 | } |
| 120 | z_address += xy_block_size; | 111 | z_address += xy_block_size; |
| 121 | } | 112 | } |
| @@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 138 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 129 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; |
| 139 | const u32 stride_x = width * out_bytes_per_pixel; | 130 | const u32 stride_x = width * out_bytes_per_pixel; |
| 140 | const u32 layer_z = height * stride_x; | 131 | const u32 layer_z = height * stride_x; |
| 141 | const u32 gob_elements_x = gob_size_x / bytes_per_pixel; | 132 | const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; |
| 142 | constexpr u32 gob_elements_y = gob_size_y; | 133 | constexpr u32 gob_elements_y = GOB_SIZE_Y; |
| 143 | constexpr u32 gob_elements_z = gob_size_z; | 134 | constexpr u32 gob_elements_z = GOB_SIZE_Z; |
| 144 | const u32 block_x_elements = gob_elements_x; | 135 | const u32 block_x_elements = gob_elements_x; |
| 145 | const u32 block_y_elements = gob_elements_y * block_height; | 136 | const u32 block_y_elements = gob_elements_y * block_height; |
| 146 | const u32 block_z_elements = gob_elements_z * block_depth; | 137 | const u32 block_z_elements = gob_elements_z * block_depth; |
| @@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 148 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); | 139 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); |
| 149 | const u32 blocks_on_y = div_ceil(height, block_y_elements); | 140 | const u32 blocks_on_y = div_ceil(height, block_y_elements); |
| 150 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); | 141 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); |
| 151 | const u32 xy_block_size = gob_size * block_height; | 142 | const u32 xy_block_size = GOB_SIZE * block_height; |
| 152 | const u32 block_size = xy_block_size * block_depth; | 143 | const u32 block_size = xy_block_size * block_depth; |
| 153 | u32 tile_offset = 0; | 144 | u32 tile_offset = 0; |
| 154 | for (u32 zb = 0; zb < blocks_on_z; zb++) { | 145 | for (u32 zb = 0; zb < blocks_on_z; zb++) { |
| @@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | |||
| 182 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 183 | const u32 block_height_size{1U << block_height}; | 174 | const u32 block_height_size{1U << block_height}; |
| 184 | const u32 block_depth_size{1U << block_depth}; | 175 | const u32 block_depth_size{1U << block_depth}; |
| 185 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 176 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { |
| 186 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 177 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 187 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | 178 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 188 | block_depth_size, width_spacing); | 179 | block_depth_size, width_spacing); |
| @@ -259,25 +250,26 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 259 | } | 250 | } |
| 260 | 251 | ||
| 261 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 252 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 262 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 253 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, |
| 263 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | 254 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 264 | const u32 block_height = 1U << block_height_bit; | 255 | const u32 block_height = 1U << block_height_bit; |
| 265 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 256 | const u32 image_width_in_gobs = |
| 266 | gob_size_x}; | 257 | (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; |
| 267 | for (u32 line = 0; line < subrect_height; ++line) { | 258 | for (u32 line = 0; line < subrect_height; ++line) { |
| 268 | const u32 dst_y = line + offset_y; | 259 | const u32 dst_y = line + offset_y; |
| 269 | const u32 gob_address_y = | 260 | const u32 gob_address_y = |
| 270 | (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 261 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 271 | ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 262 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 272 | const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; | 263 | const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; |
| 273 | for (u32 x = 0; x < subrect_width; ++x) { | 264 | for (u32 x = 0; x < subrect_width; ++x) { |
| 274 | const u32 dst_x = x + offset_x; | 265 | const u32 dst_x = x + offset_x; |
| 275 | const u32 gob_address = | 266 | const u32 gob_address = |
| 276 | gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 267 | gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 277 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; | 268 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; |
| 278 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 269 | const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; |
| 279 | u8* dest_addr = swizzled_data + swizzled_offset; | ||
| 280 | 270 | ||
| 271 | const u8* const source_line = unswizzled_data + unswizzled_offset; | ||
| 272 | u8* const dest_addr = swizzled_data + swizzled_offset; | ||
| 281 | std::memcpy(dest_addr, source_line, bytes_per_pixel); | 273 | std::memcpy(dest_addr, source_line, bytes_per_pixel); |
| 282 | } | 274 | } |
| 283 | } | 275 | } |
| @@ -289,14 +281,15 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 289 | const u32 block_height = 1U << block_height_bit; | 281 | const u32 block_height = 1U << block_height_bit; |
| 290 | for (u32 line = 0; line < subrect_height; ++line) { | 282 | for (u32 line = 0; line < subrect_height; ++line) { |
| 291 | const u32 y2 = line + offset_y; | 283 | const u32 y2 = line + offset_y; |
| 292 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 284 | const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height + |
| 293 | ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 285 | ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 294 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; | 286 | const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y]; |
| 295 | for (u32 x = 0; x < subrect_width; ++x) { | 287 | for (u32 x = 0; x < subrect_width; ++x) { |
| 296 | const u32 x2 = (x + offset_x) * bytes_per_pixel; | 288 | const u32 x2 = (x + offset_x) * bytes_per_pixel; |
| 297 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; | 289 | const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 298 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | 290 | const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X]; |
| 299 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | 291 | const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel; |
| 292 | u8* dest_line = unswizzled_data + unswizzled_offset; | ||
| 300 | u8* source_addr = swizzled_data + swizzled_offset; | 293 | u8* source_addr = swizzled_data + swizzled_offset; |
| 301 | 294 | ||
| 302 | std::memcpy(dest_line, source_addr, bytes_per_pixel); | 295 | std::memcpy(dest_line, source_addr, bytes_per_pixel); |
| @@ -304,21 +297,48 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 304 | } | 297 | } |
| 305 | } | 298 | } |
| 306 | 299 | ||
| 300 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 301 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 302 | u32 origin_y, u8* output, const u8* input) { | ||
| 303 | UNIMPLEMENTED_IF(origin_x > 0); | ||
| 304 | UNIMPLEMENTED_IF(origin_y > 0); | ||
| 305 | |||
| 306 | const u32 stride = width * bytes_per_pixel; | ||
| 307 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | ||
| 308 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 309 | |||
| 310 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 311 | const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); | ||
| 312 | |||
| 313 | for (u32 line = 0; line < line_count; ++line) { | ||
| 314 | const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; | ||
| 315 | const u32 block_y = line / GOB_SIZE_Y; | ||
| 316 | const u32 dst_offset_y = | ||
| 317 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | ||
| 318 | for (u32 x = 0; x < line_length_in; ++x) { | ||
| 319 | const u32 dst_offset = | ||
| 320 | ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; | ||
| 321 | const u32 src_offset = x * bytes_per_pixel + line * pitch; | ||
| 322 | std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); | ||
| 323 | } | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 307 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 327 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 308 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, | 328 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| 309 | u8* swizzle_data) { | 329 | u8* swizzle_data) { |
| 310 | const u32 block_height = 1U << block_height_bit; | 330 | const u32 block_height = 1U << block_height_bit; |
| 311 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 331 | const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; |
| 312 | std::size_t count = 0; | 332 | std::size_t count = 0; |
| 313 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 333 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| 314 | const std::size_t gob_address_y = | 334 | const std::size_t gob_address_y = |
| 315 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 335 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 316 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 336 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 317 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 337 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 318 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 338 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 319 | const std::size_t gob_address = | 339 | const std::size_t gob_address = |
| 320 | gob_address_y + (x / gob_size_x) * gob_size * block_height; | 340 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; |
| 321 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | 341 | const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X]; |
| 322 | const u8* source_line = source_data + count; | 342 | const u8* source_line = source_data + count; |
| 323 | u8* dest_addr = swizzle_data + swizzled_offset; | 343 | u8* dest_addr = swizzle_data + swizzled_offset; |
| 324 | count++; | 344 | count++; |
| @@ -373,9 +393,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 373 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 393 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 374 | u32 block_height, u32 block_depth) { | 394 | u32 block_height, u32 block_depth) { |
| 375 | if (tiled) { | 395 | if (tiled) { |
| 376 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); | 396 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT); |
| 377 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); | 397 | const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height); |
| 378 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); | 398 | const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth); |
| 379 | return aligned_width * aligned_height * aligned_depth; | 399 | return aligned_width * aligned_height * aligned_depth; |
| 380 | } else { | 400 | } else { |
| 381 | return width * height * depth * bytes_per_pixel; | 401 | return width * height * depth * bytes_per_pixel; |
| @@ -386,14 +406,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | |||
| 386 | u32 bytes_per_pixel) { | 406 | u32 bytes_per_pixel) { |
| 387 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 407 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; |
| 388 | const u32 gobs_in_block = 1 << block_height; | 408 | const u32 gobs_in_block = 1 << block_height; |
| 389 | const u32 y_blocks = gob_size_y << block_height; | 409 | const u32 y_blocks = GOB_SIZE_Y << block_height; |
| 390 | const u32 x_per_gob = gob_size_x / bytes_per_pixel; | 410 | const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel; |
| 391 | const u32 x_blocks = div_ceil(width, x_per_gob); | 411 | const u32 x_blocks = div_ceil(width, x_per_gob); |
| 392 | const u32 block_size = gob_size * gobs_in_block; | 412 | const u32 block_size = GOB_SIZE * gobs_in_block; |
| 393 | const u32 stride = block_size * x_blocks; | 413 | const u32 stride = block_size * x_blocks; |
| 394 | const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; | 414 | const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; |
| 395 | const u32 relative_y = dst_y % y_blocks; | 415 | const u32 relative_y = dst_y % y_blocks; |
| 396 | return base + (relative_y / gob_size_y) * gob_size; | 416 | return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE; |
| 397 | } | 417 | } |
| 398 | 418 | ||
| 399 | } // namespace Tegra::Texture | 419 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 06f3ebf87..232b696b3 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -10,15 +10,15 @@ | |||
| 10 | 10 | ||
| 11 | namespace Tegra::Texture { | 11 | namespace Tegra::Texture { |
| 12 | 12 | ||
| 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents | 13 | constexpr u32 GOB_SIZE_X = 64; |
| 14 | // an small rect of (64/bytes_per_pixel)X8. | 14 | constexpr u32 GOB_SIZE_Y = 8; |
| 15 | inline std::size_t GetGOBSize() { | 15 | constexpr u32 GOB_SIZE_Z = 1; |
| 16 | return 512; | 16 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
| 17 | } | ||
| 18 | 17 | ||
| 19 | inline std::size_t GetGOBSizeShift() { | 18 | constexpr std::size_t GOB_SIZE_X_SHIFT = 6; |
| 20 | return 9; | 19 | constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; |
| 21 | } | 20 | constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; |
| 21 | constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | ||
| 22 | 22 | ||
| 23 | /// Unswizzles a swizzled texture without changing its format. | 23 | /// Unswizzles a swizzled texture without changing its format. |
| 24 | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 24 | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, |
| @@ -48,14 +48,32 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 48 | 48 | ||
| 49 | /// Copies an untiled subrectangle into a tiled surface. | 49 | /// Copies an untiled subrectangle into a tiled surface. |
| 50 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 50 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 51 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, |
| 52 | u32 offset_x, u32 offset_y); | 52 | u32 block_height_bit, u32 offset_x, u32 offset_y); |
| 53 | 53 | ||
| 54 | /// Copies a tiled subrectangle into a linear surface. | 54 | /// Copies a tiled subrectangle into a linear surface. |
| 55 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 55 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 56 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 56 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 57 | u32 offset_x, u32 offset_y); | 57 | u32 offset_x, u32 offset_y); |
| 58 | 58 | ||
| 59 | /// @brief Swizzles a 2D array of pixels into a 3D texture | ||
| 60 | /// @param line_length_in Number of pixels per line | ||
| 61 | /// @param line_count Number of lines | ||
| 62 | /// @param pitch Number of bytes per line | ||
| 63 | /// @param width Width of the swizzled texture | ||
| 64 | /// @param height Height of the swizzled texture | ||
| 65 | /// @param bytes_per_pixel Number of bytes used per pixel | ||
| 66 | /// @param block_height Block height shift | ||
| 67 | /// @param block_depth Block depth shift | ||
| 68 | /// @param origin_x Column offset in pixels of the swizzled texture | ||
| 69 | /// @param origin_y Row offset in pixels of the swizzled texture | ||
| 70 | /// @param output Pointer to the pixels of the swizzled texture | ||
| 71 | /// @param input Pointer to the 2D array of pixels used as input | ||
| 72 | /// @pre input and output points to an array large enough to hold the number of bytes used | ||
| 73 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 74 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 75 | u32 origin_y, u8* output, const u8* input); | ||
| 76 | |||
| 59 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | 77 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, |
| 60 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); | 78 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); |
| 61 | 79 | ||