diff options
| author | 2019-05-10 04:17:48 -0300 | |
|---|---|---|
| committer | 2019-06-20 21:36:12 -0300 | |
| commit | 345e73f2feb0701e3c3099d002a1c21fb524eae4 (patch) | |
| tree | c8c934dfec804d04a29f8ee27124274f5f999fb8 /src | |
| parent | texture_cache: Change internal cache from lists to vectors (diff) | |
| download | yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.tar.gz yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.tar.xz yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.zip | |
video_core: Use un-shifted block sizes to avoid integer divisions
Instead of storing all block width, height and depths in their shifted
form:
block_width = 1U << block_shift;
Store them like they are provided by the emulated hardware (their
block_shift form). This way we can avoid doing the costly
Common::AlignUp operation to align texture sizes and drop CPU integer
divisions with bitwise logic (defined in Common::AlignBits).
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/alignment.h | 5 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 9 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_base.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.h | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 3 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 55 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 4 | ||||
| -rw-r--r-- | src/video_core/textures/texture.h | 9 |
10 files changed, 78 insertions, 60 deletions
diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..3379a6967 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h | |||
| @@ -20,6 +20,11 @@ constexpr T AlignDown(T value, std::size_t size) { | |||
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | template <typename T> | 22 | template <typename T> |
| 23 | constexpr T AlignBits(T value, T align) { | ||
| 24 | return (value + ((1 << align) - 1)) >> align << align; | ||
| 25 | } | ||
| 26 | |||
| 27 | template <typename T> | ||
| 23 | constexpr bool Is4KBAligned(T value) { | 28 | constexpr bool Is4KBAligned(T value) { |
| 24 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | 29 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); |
| 25 | return (value & 0xFFF) == 0; | 30 | return (value & 0xFFF) == 0; |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 45f59a4d9..3d28afa91 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -63,18 +63,15 @@ public: | |||
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | u32 BlockWidth() const { | 65 | u32 BlockWidth() const { |
| 66 | // The block width is stored in log2 format. | 66 | return block_width; |
| 67 | return 1 << block_width; | ||
| 68 | } | 67 | } |
| 69 | 68 | ||
| 70 | u32 BlockHeight() const { | 69 | u32 BlockHeight() const { |
| 71 | // The block height is stored in log2 format. | 70 | return block_height; |
| 72 | return 1 << block_height; | ||
| 73 | } | 71 | } |
| 74 | 72 | ||
| 75 | u32 BlockDepth() const { | 73 | u32 BlockDepth() const { |
| 76 | // The block depth is stored in log2 format. | 74 | return block_depth; |
| 77 | return 1 << block_depth; | ||
| 78 | } | 75 | } |
| 79 | }; | 76 | }; |
| 80 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | 77 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index e5942f671..522fa97dc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -59,11 +59,11 @@ public: | |||
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | u32 BlockHeight() const { | 61 | u32 BlockHeight() const { |
| 62 | return 1 << block_height; | 62 | return block_height; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | u32 BlockDepth() const { | 65 | u32 BlockDepth() const { |
| 66 | return 1 << block_depth; | 66 | return block_depth; |
| 67 | } | 67 | } |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 36ca72b4a..510d1aef5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -22,7 +22,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | |||
| 22 | : params{params}, mipmap_sizes(params.num_levels), | 22 | : params{params}, mipmap_sizes(params.num_levels), |
| 23 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ | 23 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ |
| 24 | params.GetHostSizeInBytes()} { | 24 | params.GetHostSizeInBytes()} { |
| 25 | |||
| 26 | std::size_t offset = 0; | 25 | std::size_t offset = 0; |
| 27 | for (u32 level = 0; level < params.num_levels; ++level) { | 26 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 28 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | 27 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; |
| @@ -75,7 +74,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
| 75 | return; | 74 | return; |
| 76 | } | 75 | } |
| 77 | if (params.is_tiled) { | 76 | if (params.is_tiled) { |
| 78 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", | 77 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", |
| 79 | params.block_width, static_cast<u32>(params.target)); | 78 | params.block_width, static_cast<u32>(params.target)); |
| 80 | for (u32 level = 0; level < params.num_levels; ++level) { | 79 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 81 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | 80 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index b537b26e2..3a47f404d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -96,9 +96,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( | |||
| 96 | SurfaceParams params; | 96 | SurfaceParams params; |
| 97 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | 97 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
| 98 | params.srgb_conversion = false; | 98 | params.srgb_conversion = false; |
| 99 | params.block_width = 1 << std::min(block_width, 5U); | 99 | params.block_width = std::min(block_width, 5U); |
| 100 | params.block_height = 1 << std::min(block_height, 5U); | 100 | params.block_height = std::min(block_height, 5U); |
| 101 | params.block_depth = 1 << std::min(block_depth, 5U); | 101 | params.block_depth = std::min(block_depth, 5U); |
| 102 | params.tile_width_spacing = 1; | 102 | params.tile_width_spacing = 1; |
| 103 | params.pixel_format = PixelFormatFromDepthFormat(format); | 103 | params.pixel_format = PixelFormatFromDepthFormat(format); |
| 104 | params.component_type = ComponentTypeFromDepthFormat(format); | 104 | params.component_type = ComponentTypeFromDepthFormat(format); |
| @@ -120,9 +120,9 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz | |||
| 120 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | 120 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
| 121 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | 121 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || |
| 122 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | 122 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
| 123 | params.block_width = 1 << config.memory_layout.block_width; | 123 | params.block_width = config.memory_layout.block_width; |
| 124 | params.block_height = 1 << config.memory_layout.block_height; | 124 | params.block_height = config.memory_layout.block_height; |
| 125 | params.block_depth = 1 << config.memory_layout.block_depth; | 125 | params.block_depth = config.memory_layout.block_depth; |
| 126 | params.tile_width_spacing = 1; | 126 | params.tile_width_spacing = 1; |
| 127 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 127 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| 128 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 128 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| @@ -149,9 +149,9 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
| 149 | params.is_tiled = !config.linear; | 149 | params.is_tiled = !config.linear; |
| 150 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | 150 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || |
| 151 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | 151 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
| 152 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, | 152 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, |
| 153 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | 153 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, |
| 154 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | 154 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, |
| 155 | params.tile_width_spacing = 1; | 155 | params.tile_width_spacing = 1; |
| 156 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 156 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| 157 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 157 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| @@ -190,9 +190,9 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | |||
| 190 | const u32 height{GetMipHeight(level)}; | 190 | const u32 height{GetMipHeight(level)}; |
| 191 | const u32 default_block_height{GetDefaultBlockHeight()}; | 191 | const u32 default_block_height{GetDefaultBlockHeight()}; |
| 192 | const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; | 192 | const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; |
| 193 | u32 block_height = 16; | 193 | u32 block_height = 4; |
| 194 | while (block_height > 1 && blocks_in_y <= block_height * 4) { | 194 | while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { |
| 195 | block_height >>= 1; | 195 | --block_height; |
| 196 | } | 196 | } |
| 197 | return block_height; | 197 | return block_height; |
| 198 | } | 198 | } |
| @@ -202,17 +202,17 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | |||
| 202 | return this->block_depth; | 202 | return this->block_depth; |
| 203 | } | 203 | } |
| 204 | if (is_layered) { | 204 | if (is_layered) { |
| 205 | return 1; | 205 | return 0; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | const u32 depth{GetMipDepth(level)}; | 208 | const u32 depth{GetMipDepth(level)}; |
| 209 | u32 block_depth = 32; | 209 | u32 block_depth = 5; |
| 210 | while (block_depth > 1 && depth * 2 <= block_depth) { | 210 | while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { |
| 211 | block_depth >>= 1; | 211 | --block_depth; |
| 212 | } | 212 | } |
| 213 | 213 | ||
| 214 | if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { | 214 | if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { |
| 215 | return 16; | 215 | return 4; |
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | return block_depth; | 218 | return block_depth; |
| @@ -252,7 +252,8 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 252 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | 252 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); |
| 253 | } | 253 | } |
| 254 | if (is_tiled && is_layered) { | 254 | if (is_tiled && is_layered) { |
| 255 | return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | 255 | return Common::AlignBits(size, |
| 256 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 256 | } | 257 | } |
| 257 | return size; | 258 | return size; |
| 258 | } | 259 | } |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e0ec1be0e..7c48782c7 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -54,12 +54,12 @@ public: | |||
| 54 | constexpr std::size_t rgb8_bpp = 4ULL; | 54 | constexpr std::size_t rgb8_bpp = 4ULL; |
| 55 | // ASTC is uncompressed in software, in emulated as RGBA8 | 55 | // ASTC is uncompressed in software, in emulated as RGBA8 |
| 56 | host_size_in_bytes = 0; | 56 | host_size_in_bytes = 0; |
| 57 | for (std::size_t level = 0; level < num_levels; level++) { | 57 | for (u32 level = 0; level < num_levels; ++level) { |
| 58 | const std::size_t width = | 58 | const std::size_t width = |
| 59 | Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); | 59 | Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); |
| 60 | const std::size_t height = | 60 | const std::size_t height = |
| 61 | Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); | 61 | Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); |
| 62 | const std::size_t depth = is_layered ? depth : GetMipDepth(level); | 62 | const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); |
| 63 | host_size_in_bytes += width * height * depth * rgb8_bpp; | 63 | host_size_in_bytes += width * height * depth * rgb8_bpp; |
| 64 | } | 64 | } |
| 65 | } else { | 65 | } else { |
| @@ -96,7 +96,8 @@ public: | |||
| 96 | // Helper used for out of class size calculations | 96 | // Helper used for out of class size calculations |
| 97 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | 97 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, |
| 98 | const u32 block_depth) { | 98 | const u32 block_depth) { |
| 99 | return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | 99 | return Common::AlignBits(out_size, |
| 100 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 100 | } | 101 | } |
| 101 | 102 | ||
| 102 | /// Returns the offset in bytes in guest memory of a given mipmap level. | 103 | /// Returns the offset in bytes in guest memory of a given mipmap level. |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -81,6 +81,9 @@ public: | |||
| 81 | if (!gpu_addr) { | 81 | if (!gpu_addr) { |
| 82 | return {}; | 82 | return {}; |
| 83 | } | 83 | } |
| 84 | if (gpu_addr == 0x1b7ec0000) { | ||
| 85 | // __debugbreak(); | ||
| 86 | } | ||
| 84 | const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; | 87 | const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; |
| 85 | return GetSurface(gpu_addr, params, true).second; | 88 | return GetSurface(gpu_addr, params, true).second; |
| 86 | } | 89 | } |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..f45fd175a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { | |||
| 36 | std::array<std::array<u16, M>, N> values{}; | 36 | std::array<std::array<u16, M>, N> values{}; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | constexpr u32 gob_size_x = 64; | 39 | constexpr u32 gob_size_x_shift = 6; |
| 40 | constexpr u32 gob_size_y = 8; | 40 | constexpr u32 gob_size_y_shift = 3; |
| 41 | constexpr u32 gob_size_z = 1; | 41 | constexpr u32 gob_size_z_shift = 0; |
| 42 | constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | 42 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; |
| 43 | |||
| 44 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | ||
| 45 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | ||
| 46 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 47 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 48 | |||
| 43 | constexpr u32 fast_swizzle_align = 16; | 49 | constexpr u32 fast_swizzle_align = 16; |
| 44 | 50 | ||
| 45 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | 51 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); |
| @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 177 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 178 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 179 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 180 | const u32 block_height_size{1U << block_height}; | ||
| 181 | const u32 block_depth_size{1U << block_depth}; | ||
| 174 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 182 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { |
| 175 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 183 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 176 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 184 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 177 | width_spacing); | 185 | block_depth_size, width_spacing); |
| 178 | } else { | 186 | } else { |
| 179 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 187 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 180 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 188 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 181 | width_spacing); | 189 | block_depth_size, width_spacing); |
| 182 | } | 190 | } |
| 183 | } | 191 | } |
| 184 | 192 | ||
| @@ -249,16 +257,18 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 249 | 257 | ||
| 250 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 251 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { |
| 260 | const u32 block_height_size{1U << block_height}; | ||
| 252 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 261 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 253 | gob_size_x}; | 262 | gob_size_x}; |
| 254 | for (u32 line = 0; line < subrect_height; ++line) { | 263 | for (u32 line = 0; line < subrect_height; ++line) { |
| 255 | const u32 gob_address_y = | 264 | const u32 gob_address_y = |
| 256 | (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 265 | (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * |
| 257 | ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 266 | image_width_in_gobs + |
| 267 | ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 258 | const auto& table = legacy_swizzle_table[line % gob_size_y]; | 268 | const auto& table = legacy_swizzle_table[line % gob_size_y]; |
| 259 | for (u32 x = 0; x < subrect_width; ++x) { | 269 | for (u32 x = 0; x < subrect_width; ++x) { |
| 260 | const u32 gob_address = | 270 | const u32 gob_address = |
| 261 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 271 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; |
| 262 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 272 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; |
| 263 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 273 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 264 | u8* dest_addr = swizzled_data + swizzled_offset; | 274 | u8* dest_addr = swizzled_data + swizzled_offset; |
| @@ -271,14 +281,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 271 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 281 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 272 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 282 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 273 | u32 offset_x, u32 offset_y) { | 283 | u32 offset_x, u32 offset_y) { |
| 284 | const u32 block_height_size{1U << block_height}; | ||
| 274 | for (u32 line = 0; line < subrect_height; ++line) { | 285 | for (u32 line = 0; line < subrect_height; ++line) { |
| 275 | const u32 y2 = line + offset_y; | 286 | const u32 y2 = line + offset_y; |
| 276 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 287 | const u32 gob_address_y = |
| 277 | ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 288 | (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + |
| 289 | ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 278 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; | 290 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; |
| 279 | for (u32 x = 0; x < subrect_width; ++x) { | 291 | for (u32 x = 0; x < subrect_width; ++x) { |
| 280 | const u32 x2 = (x + offset_x) * bytes_per_pixel; | 292 | const u32 x2 = (x + offset_x) * bytes_per_pixel; |
| 281 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; | 293 | const u32 gob_address = |
| 294 | gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; | ||
| 282 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | 295 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; |
| 283 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | 296 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; |
| 284 | u8* source_addr = swizzled_data + swizzled_offset; | 297 | u8* source_addr = swizzled_data + swizzled_offset; |
| @@ -291,16 +304,18 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 304 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 305 | const u32 block_height, const std::size_t copy_size, const u8* source_data, |
| 293 | u8* swizzle_data) { | 306 | u8* swizzle_data) { |
| 307 | const u32 block_height_size{1U << block_height}; | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 308 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; |
| 295 | std::size_t count = 0; | 309 | std::size_t count = 0; |
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 310 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| 297 | const std::size_t gob_address_y = | 311 | const std::size_t gob_address_y = |
| 298 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 312 | (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * |
| 299 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 313 | image_width_in_gobs + |
| 314 | ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 300 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 315 | const auto& table = legacy_swizzle_table[y % gob_size_y]; |
| 301 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 316 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 302 | const std::size_t gob_address = | 317 | const std::size_t gob_address = |
| 303 | gob_address_y + (x / gob_size_x) * gob_size * block_height; | 318 | gob_address_y + (x / gob_size_x) * gob_size * block_height_size; |
| 304 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | 319 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; |
| 305 | const u8* source_line = source_data + count; | 320 | const u8* source_line = source_data + count; |
| 306 | u8* dest_addr = swizzle_data + swizzled_offset; | 321 | u8* dest_addr = swizzle_data + swizzled_offset; |
| @@ -356,9 +371,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 356 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 371 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 357 | u32 block_height, u32 block_depth) { | 372 | u32 block_height, u32 block_depth) { |
| 358 | if (tiled) { | 373 | if (tiled) { |
| 359 | const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); | 374 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); |
| 360 | const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); | 375 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); |
| 361 | const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); | 376 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); |
| 362 | return aligned_width * aligned_height * aligned_depth; | 377 | return aligned_width * aligned_height * aligned_depth; |
| 363 | } else { | 378 | } else { |
| 364 | return width * height * depth * bytes_per_pixel; | 379 | return width * height * depth * bytes_per_pixel; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e072d8401..eaec9b5a5 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -12,8 +12,8 @@ namespace Tegra::Texture { | |||
| 12 | 12 | ||
| 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents | 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents |
| 14 | // an small rect of (64/bytes_per_pixel)X8. | 14 | // an small rect of (64/bytes_per_pixel)X8. |
| 15 | inline std::size_t GetGOBSize() { | 15 | inline std::size_t GetGOBSizeShift() { |
| 16 | return 512; | 16 | return 9; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /// Unswizzles a swizzled texture without changing its format. | 19 | /// Unswizzles a swizzled texture without changing its format. |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 219bfd559..f22b4e7c7 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -219,20 +219,17 @@ struct TICEntry { | |||
| 219 | 219 | ||
| 220 | u32 BlockWidth() const { | 220 | u32 BlockWidth() const { |
| 221 | ASSERT(IsTiled()); | 221 | ASSERT(IsTiled()); |
| 222 | // The block height is stored in log2 format. | 222 | return block_width; |
| 223 | return 1 << block_width; | ||
| 224 | } | 223 | } |
| 225 | 224 | ||
| 226 | u32 BlockHeight() const { | 225 | u32 BlockHeight() const { |
| 227 | ASSERT(IsTiled()); | 226 | ASSERT(IsTiled()); |
| 228 | // The block height is stored in log2 format. | 227 | return block_height; |
| 229 | return 1 << block_height; | ||
| 230 | } | 228 | } |
| 231 | 229 | ||
| 232 | u32 BlockDepth() const { | 230 | u32 BlockDepth() const { |
| 233 | ASSERT(IsTiled()); | 231 | ASSERT(IsTiled()); |
| 234 | // The block height is stored in log2 format. | 232 | return block_depth; |
| 235 | return 1 << block_depth; | ||
| 236 | } | 233 | } |
| 237 | 234 | ||
| 238 | bool IsTiled() const { | 235 | bool IsTiled() const { |