diff options
| author | 2019-05-10 04:17:48 -0300 | |
|---|---|---|
| committer | 2019-06-20 21:36:12 -0300 | |
| commit | 345e73f2feb0701e3c3099d002a1c21fb524eae4 (patch) | |
| tree | c8c934dfec804d04a29f8ee27124274f5f999fb8 /src/video_core/texture_cache | |
| parent | texture_cache: Change internal cache from lists to vectors (diff) | |
| download | yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.tar.gz yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.tar.xz yuzu-345e73f2feb0701e3c3099d002a1c21fb524eae4.zip | |
video_core: Use un-shifted block sizes to avoid integer divisions
Instead of storing all block width, height and depths in their shifted
form:
block_width = 1U << block_shift;
Store them like they are provided by the emulated hardware (their
block_shift form). This way we can avoid doing the costly
Common::AlignUp operation to align texture sizes and drop CPU integer
divisions with bitwise logic (defined in Common::AlignBits).
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/surface_base.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.h | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 3 |
4 files changed, 28 insertions, 24 deletions
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 36ca72b4a..510d1aef5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -22,7 +22,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | |||
| 22 | : params{params}, mipmap_sizes(params.num_levels), | 22 | : params{params}, mipmap_sizes(params.num_levels), |
| 23 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ | 23 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ |
| 24 | params.GetHostSizeInBytes()} { | 24 | params.GetHostSizeInBytes()} { |
| 25 | |||
| 26 | std::size_t offset = 0; | 25 | std::size_t offset = 0; |
| 27 | for (u32 level = 0; level < params.num_levels; ++level) { | 26 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 28 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | 27 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; |
| @@ -75,7 +74,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
| 75 | return; | 74 | return; |
| 76 | } | 75 | } |
| 77 | if (params.is_tiled) { | 76 | if (params.is_tiled) { |
| 78 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", | 77 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", |
| 79 | params.block_width, static_cast<u32>(params.target)); | 78 | params.block_width, static_cast<u32>(params.target)); |
| 80 | for (u32 level = 0; level < params.num_levels; ++level) { | 79 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 81 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | 80 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index b537b26e2..3a47f404d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -96,9 +96,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( | |||
| 96 | SurfaceParams params; | 96 | SurfaceParams params; |
| 97 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | 97 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
| 98 | params.srgb_conversion = false; | 98 | params.srgb_conversion = false; |
| 99 | params.block_width = 1 << std::min(block_width, 5U); | 99 | params.block_width = std::min(block_width, 5U); |
| 100 | params.block_height = 1 << std::min(block_height, 5U); | 100 | params.block_height = std::min(block_height, 5U); |
| 101 | params.block_depth = 1 << std::min(block_depth, 5U); | 101 | params.block_depth = std::min(block_depth, 5U); |
| 102 | params.tile_width_spacing = 1; | 102 | params.tile_width_spacing = 1; |
| 103 | params.pixel_format = PixelFormatFromDepthFormat(format); | 103 | params.pixel_format = PixelFormatFromDepthFormat(format); |
| 104 | params.component_type = ComponentTypeFromDepthFormat(format); | 104 | params.component_type = ComponentTypeFromDepthFormat(format); |
| @@ -120,9 +120,9 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz | |||
| 120 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | 120 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
| 121 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | 121 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || |
| 122 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | 122 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
| 123 | params.block_width = 1 << config.memory_layout.block_width; | 123 | params.block_width = config.memory_layout.block_width; |
| 124 | params.block_height = 1 << config.memory_layout.block_height; | 124 | params.block_height = config.memory_layout.block_height; |
| 125 | params.block_depth = 1 << config.memory_layout.block_depth; | 125 | params.block_depth = config.memory_layout.block_depth; |
| 126 | params.tile_width_spacing = 1; | 126 | params.tile_width_spacing = 1; |
| 127 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 127 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| 128 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 128 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| @@ -149,9 +149,9 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
| 149 | params.is_tiled = !config.linear; | 149 | params.is_tiled = !config.linear; |
| 150 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | 150 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || |
| 151 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | 151 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
| 152 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, | 152 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, |
| 153 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | 153 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, |
| 154 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | 154 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, |
| 155 | params.tile_width_spacing = 1; | 155 | params.tile_width_spacing = 1; |
| 156 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 156 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| 157 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 157 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| @@ -190,9 +190,9 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | |||
| 190 | const u32 height{GetMipHeight(level)}; | 190 | const u32 height{GetMipHeight(level)}; |
| 191 | const u32 default_block_height{GetDefaultBlockHeight()}; | 191 | const u32 default_block_height{GetDefaultBlockHeight()}; |
| 192 | const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; | 192 | const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; |
| 193 | u32 block_height = 16; | 193 | u32 block_height = 4; |
| 194 | while (block_height > 1 && blocks_in_y <= block_height * 4) { | 194 | while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { |
| 195 | block_height >>= 1; | 195 | --block_height; |
| 196 | } | 196 | } |
| 197 | return block_height; | 197 | return block_height; |
| 198 | } | 198 | } |
| @@ -202,17 +202,17 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | |||
| 202 | return this->block_depth; | 202 | return this->block_depth; |
| 203 | } | 203 | } |
| 204 | if (is_layered) { | 204 | if (is_layered) { |
| 205 | return 1; | 205 | return 0; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | const u32 depth{GetMipDepth(level)}; | 208 | const u32 depth{GetMipDepth(level)}; |
| 209 | u32 block_depth = 32; | 209 | u32 block_depth = 5; |
| 210 | while (block_depth > 1 && depth * 2 <= block_depth) { | 210 | while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { |
| 211 | block_depth >>= 1; | 211 | --block_depth; |
| 212 | } | 212 | } |
| 213 | 213 | ||
| 214 | if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { | 214 | if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { |
| 215 | return 16; | 215 | return 4; |
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | return block_depth; | 218 | return block_depth; |
| @@ -252,7 +252,8 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 252 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | 252 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); |
| 253 | } | 253 | } |
| 254 | if (is_tiled && is_layered) { | 254 | if (is_tiled && is_layered) { |
| 255 | return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | 255 | return Common::AlignBits(size, |
| 256 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 256 | } | 257 | } |
| 257 | return size; | 258 | return size; |
| 258 | } | 259 | } |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e0ec1be0e..7c48782c7 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -54,12 +54,12 @@ public: | |||
| 54 | constexpr std::size_t rgb8_bpp = 4ULL; | 54 | constexpr std::size_t rgb8_bpp = 4ULL; |
| 55 | // ASTC is uncompressed in software, in emulated as RGBA8 | 55 | // ASTC is uncompressed in software, in emulated as RGBA8 |
| 56 | host_size_in_bytes = 0; | 56 | host_size_in_bytes = 0; |
| 57 | for (std::size_t level = 0; level < num_levels; level++) { | 57 | for (u32 level = 0; level < num_levels; ++level) { |
| 58 | const std::size_t width = | 58 | const std::size_t width = |
| 59 | Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); | 59 | Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); |
| 60 | const std::size_t height = | 60 | const std::size_t height = |
| 61 | Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); | 61 | Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); |
| 62 | const std::size_t depth = is_layered ? depth : GetMipDepth(level); | 62 | const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); |
| 63 | host_size_in_bytes += width * height * depth * rgb8_bpp; | 63 | host_size_in_bytes += width * height * depth * rgb8_bpp; |
| 64 | } | 64 | } |
| 65 | } else { | 65 | } else { |
| @@ -96,7 +96,8 @@ public: | |||
| 96 | // Helper used for out of class size calculations | 96 | // Helper used for out of class size calculations |
| 97 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | 97 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, |
| 98 | const u32 block_depth) { | 98 | const u32 block_depth) { |
| 99 | return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | 99 | return Common::AlignBits(out_size, |
| 100 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 100 | } | 101 | } |
| 101 | 102 | ||
| 102 | /// Returns the offset in bytes in guest memory of a given mipmap level. | 103 | /// Returns the offset in bytes in guest memory of a given mipmap level. |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -81,6 +81,9 @@ public: | |||
| 81 | if (!gpu_addr) { | 81 | if (!gpu_addr) { |
| 82 | return {}; | 82 | return {}; |
| 83 | } | 83 | } |
| 84 | if (gpu_addr == 0x1b7ec0000) { | ||
| 85 | // __debugbreak(); | ||
| 86 | } | ||
| 84 | const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; | 87 | const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; |
| 85 | return GetSurface(gpu_addr, params, true).second; | 88 | return GetSurface(gpu_addr, params, true).second; |
| 86 | } | 89 | } |