diff options
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/decode_bc.cpp | 129 | ||||
| -rw-r--r-- | src/video_core/texture_cache/decode_bc.h (renamed from src/video_core/texture_cache/decode_bc4.h) | 6 | ||||
| -rw-r--r-- | src/video_core/texture_cache/decode_bc4.cpp | 96 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 5 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 53 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 14 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 72 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 31 |
8 files changed, 226 insertions, 180 deletions
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp new file mode 100644 index 000000000..3e26474a3 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc.cpp | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | #include <bc_decoder.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/decode_bc.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr u32 BLOCK_SIZE = 4; | ||
| 16 | |||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | |||
| 19 | constexpr bool IsSigned(PixelFormat pixel_format) { | ||
| 20 | switch (pixel_format) { | ||
| 21 | case PixelFormat::BC4_SNORM: | ||
| 22 | case PixelFormat::BC4_UNORM: | ||
| 23 | case PixelFormat::BC5_SNORM: | ||
| 24 | case PixelFormat::BC5_UNORM: | ||
| 25 | case PixelFormat::BC6H_SFLOAT: | ||
| 26 | case PixelFormat::BC6H_UFLOAT: | ||
| 27 | return true; | ||
| 28 | default: | ||
| 29 | return false; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | constexpr u32 BlockSize(PixelFormat pixel_format) { | ||
| 34 | switch (pixel_format) { | ||
| 35 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 36 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 37 | case PixelFormat::BC4_SNORM: | ||
| 38 | case PixelFormat::BC4_UNORM: | ||
| 39 | return 8; | ||
| 40 | default: | ||
| 41 | return 16; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { | ||
| 47 | switch (pixel_format) { | ||
| 48 | case PixelFormat::BC4_SNORM: | ||
| 49 | case PixelFormat::BC4_UNORM: | ||
| 50 | return 1; | ||
| 51 | case PixelFormat::BC5_SNORM: | ||
| 52 | case PixelFormat::BC5_UNORM: | ||
| 53 | return 2; | ||
| 54 | case PixelFormat::BC6H_SFLOAT: | ||
| 55 | case PixelFormat::BC6H_UFLOAT: | ||
| 56 | return 8; | ||
| 57 | default: | ||
| 58 | return 4; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <auto decompress, PixelFormat pixel_format> | ||
| 63 | void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 64 | bool is_signed = false) { | ||
| 65 | const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); | ||
| 66 | const u32 block_width = std::min(extent.width, BLOCK_SIZE); | ||
| 67 | const u32 block_height = std::min(extent.height, BLOCK_SIZE); | ||
| 68 | const u32 pitch = extent.width * out_bpp; | ||
| 69 | size_t input_offset = 0; | ||
| 70 | size_t output_offset = 0; | ||
| 71 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 72 | for (u32 y = 0; y < extent.height; y += block_height) { | ||
| 73 | size_t row_offset = 0; | ||
| 74 | for (u32 x = 0; x < extent.width; | ||
| 75 | x += block_width, row_offset += block_width * out_bpp) { | ||
| 76 | const u8* src = input.data() + input_offset; | ||
| 77 | u8* const dst = output.data() + output_offset + row_offset; | ||
| 78 | if constexpr (IsSigned(pixel_format)) { | ||
| 79 | decompress(src, dst, x, y, extent.width, extent.height, is_signed); | ||
| 80 | } else { | ||
| 81 | decompress(src, dst, x, y, extent.width, extent.height); | ||
| 82 | } | ||
| 83 | input_offset += BlockSize(pixel_format); | ||
| 84 | } | ||
| 85 | output_offset += block_height * pitch; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 91 | VideoCore::Surface::PixelFormat pixel_format) { | ||
| 92 | switch (pixel_format) { | ||
| 93 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 94 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 95 | DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); | ||
| 96 | break; | ||
| 97 | case PixelFormat::BC2_UNORM: | ||
| 98 | case PixelFormat::BC2_SRGB: | ||
| 99 | DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); | ||
| 100 | break; | ||
| 101 | case PixelFormat::BC3_UNORM: | ||
| 102 | case PixelFormat::BC3_SRGB: | ||
| 103 | DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); | ||
| 104 | break; | ||
| 105 | case PixelFormat::BC4_SNORM: | ||
| 106 | case PixelFormat::BC4_UNORM: | ||
| 107 | DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( | ||
| 108 | input, output, extent, pixel_format == PixelFormat::BC4_SNORM); | ||
| 109 | break; | ||
| 110 | case PixelFormat::BC5_SNORM: | ||
| 111 | case PixelFormat::BC5_UNORM: | ||
| 112 | DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( | ||
| 113 | input, output, extent, pixel_format == PixelFormat::BC5_SNORM); | ||
| 114 | break; | ||
| 115 | case PixelFormat::BC6H_SFLOAT: | ||
| 116 | case PixelFormat::BC6H_UFLOAT: | ||
| 117 | DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( | ||
| 118 | input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); | ||
| 119 | break; | ||
| 120 | case PixelFormat::BC7_SRGB: | ||
| 121 | case PixelFormat::BC7_UNORM: | ||
| 122 | DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc.h index ab2f735be..41d1ec0a3 100644 --- a/src/video_core/texture_cache/decode_bc4.h +++ b/src/video_core/texture_cache/decode_bc.h | |||
| @@ -6,10 +6,14 @@ | |||
| 6 | #include <span> | 6 | #include <span> |
| 7 | 7 | ||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/surface.h" | ||
| 9 | #include "video_core/texture_cache/types.h" | 10 | #include "video_core/texture_cache/types.h" |
| 10 | 11 | ||
| 11 | namespace VideoCommon { | 12 | namespace VideoCommon { |
| 12 | 13 | ||
| 13 | void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); | 14 | [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); |
| 15 | |||
| 16 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 17 | VideoCore::Surface::PixelFormat pixel_format); | ||
| 14 | 18 | ||
| 15 | } // namespace VideoCommon | 19 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp deleted file mode 100644 index ef98afdca..000000000 --- a/src/video_core/texture_cache/decode_bc4.cpp +++ /dev/null | |||
| @@ -1,96 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 16 | [[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { | ||
| 17 | const u32 code_offset = 16 + 3 * (4 * y + x); | ||
| 18 | const u32 code = (bits >> code_offset) & 7; | ||
| 19 | const u32 red0 = (bits >> 0) & 0xff; | ||
| 20 | const u32 red1 = (bits >> 8) & 0xff; | ||
| 21 | if (red0 > red1) { | ||
| 22 | switch (code) { | ||
| 23 | case 0: | ||
| 24 | return red0; | ||
| 25 | case 1: | ||
| 26 | return red1; | ||
| 27 | case 2: | ||
| 28 | return (6 * red0 + 1 * red1) / 7; | ||
| 29 | case 3: | ||
| 30 | return (5 * red0 + 2 * red1) / 7; | ||
| 31 | case 4: | ||
| 32 | return (4 * red0 + 3 * red1) / 7; | ||
| 33 | case 5: | ||
| 34 | return (3 * red0 + 4 * red1) / 7; | ||
| 35 | case 6: | ||
| 36 | return (2 * red0 + 5 * red1) / 7; | ||
| 37 | case 7: | ||
| 38 | return (1 * red0 + 6 * red1) / 7; | ||
| 39 | } | ||
| 40 | } else { | ||
| 41 | switch (code) { | ||
| 42 | case 0: | ||
| 43 | return red0; | ||
| 44 | case 1: | ||
| 45 | return red1; | ||
| 46 | case 2: | ||
| 47 | return (4 * red0 + 1 * red1) / 5; | ||
| 48 | case 3: | ||
| 49 | return (3 * red0 + 2 * red1) / 5; | ||
| 50 | case 4: | ||
| 51 | return (2 * red0 + 3 * red1) / 5; | ||
| 52 | case 5: | ||
| 53 | return (1 * red0 + 4 * red1) / 5; | ||
| 54 | case 6: | ||
| 55 | return 0; | ||
| 56 | case 7: | ||
| 57 | return 0xff; | ||
| 58 | } | ||
| 59 | } | ||
| 60 | return 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { | ||
| 64 | UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); | ||
| 65 | UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); | ||
| 66 | static constexpr u32 BLOCK_SIZE = 4; | ||
| 67 | size_t input_offset = 0; | ||
| 68 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 69 | for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { | ||
| 70 | for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { | ||
| 71 | u64 bits; | ||
| 72 | std::memcpy(&bits, &input[input_offset], sizeof(bits)); | ||
| 73 | input_offset += sizeof(bits); | ||
| 74 | |||
| 75 | for (u32 y = 0; y < BLOCK_SIZE; ++y) { | ||
| 76 | for (u32 x = 0; x < BLOCK_SIZE; ++x) { | ||
| 77 | const u32 linear_z = slice; | ||
| 78 | const u32 linear_y = block_y * BLOCK_SIZE + y; | ||
| 79 | const u32 linear_x = block_x * BLOCK_SIZE + x; | ||
| 80 | const u32 offset_z = linear_z * extent.width * extent.height; | ||
| 81 | const u32 offset_y = linear_y * extent.width; | ||
| 82 | const u32 offset_x = linear_x; | ||
| 83 | const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; | ||
| 84 | const u32 color = DecompressBlock(bits, x, y); | ||
| 85 | output[output_offset + 0] = static_cast<u8>(color); | ||
| 86 | output[output_offset + 1] = 0; | ||
| 87 | output[output_offset + 2] = 0; | ||
| 88 | output[output_offset + 3] = 0xff; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include <boost/container/small_vector.hpp> | ||
| 9 | 10 | ||
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -108,8 +109,8 @@ struct ImageBase { | |||
| 108 | std::vector<ImageViewInfo> image_view_infos; | 109 | std::vector<ImageViewInfo> image_view_infos; |
| 109 | std::vector<ImageViewId> image_view_ids; | 110 | std::vector<ImageViewId> image_view_ids; |
| 110 | 111 | ||
| 111 | std::vector<u32> slice_offsets; | 112 | boost::container::small_vector<u32, 16> slice_offsets; |
| 112 | std::vector<SubresourceBase> slice_subresources; | 113 | boost::container::small_vector<SubresourceBase, 16> slice_subresources; |
| 113 | 114 | ||
| 114 | std::vector<AliasedImage> aliased_images; | 115 | std::vector<AliasedImage> aliased_images; |
| 115 | std::vector<ImageId> overlapping_images; | 116 | std::vector<ImageId> overlapping_images; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4027d860b..8190f3ba1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { | |||
| 186 | 186 | ||
| 187 | template <class P> | 187 | template <class P> |
| 188 | void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { | 188 | void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { |
| 189 | if (!Settings::values.barrier_feedback_loops.GetValue()) { | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | |||
| 189 | const bool requires_barrier = [&] { | 193 | const bool requires_barrier = [&] { |
| 190 | for (const auto& view : views) { | 194 | for (const auto& view : views) { |
| 191 | if (!view.id) { | 195 | if (!view.id) { |
| @@ -300,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { | |||
| 300 | } | 304 | } |
| 301 | 305 | ||
| 302 | template <class P> | 306 | template <class P> |
| 303 | bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | 307 | bool TextureCache<P>::RescaleRenderTargets() { |
| 304 | auto& flags = maxwell3d->dirty.flags; | 308 | auto& flags = maxwell3d->dirty.flags; |
| 305 | u32 scale_rating = 0; | 309 | u32 scale_rating = 0; |
| 306 | bool rescaled = false; | 310 | bool rescaled = false; |
| @@ -338,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | |||
| 338 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | 342 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 339 | if (flags[Dirty::ColorBuffer0 + index] || force) { | 343 | if (flags[Dirty::ColorBuffer0 + index] || force) { |
| 340 | flags[Dirty::ColorBuffer0 + index] = false; | 344 | flags[Dirty::ColorBuffer0 + index] = false; |
| 341 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | 345 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index)); |
| 342 | } | 346 | } |
| 343 | check_rescale(color_buffer_id, tmp_color_images[index]); | 347 | check_rescale(color_buffer_id, tmp_color_images[index]); |
| 344 | } | 348 | } |
| 345 | if (flags[Dirty::ZetaBuffer] || force) { | 349 | if (flags[Dirty::ZetaBuffer] || force) { |
| 346 | flags[Dirty::ZetaBuffer] = false; | 350 | flags[Dirty::ZetaBuffer] = false; |
| 347 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | 351 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer()); |
| 348 | } | 352 | } |
| 349 | check_rescale(render_targets.depth_buffer_id, tmp_depth_image); | 353 | check_rescale(render_targets.depth_buffer_id, tmp_depth_image); |
| 350 | 354 | ||
| @@ -409,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | |||
| 409 | return; | 413 | return; |
| 410 | } | 414 | } |
| 411 | 415 | ||
| 412 | const bool rescaled = RescaleRenderTargets(is_clear); | 416 | const bool rescaled = RescaleRenderTargets(); |
| 413 | if (is_rescaling != rescaled) { | 417 | if (is_rescaling != rescaled) { |
| 414 | flags[Dirty::RescaleViewports] = true; | 418 | flags[Dirty::RescaleViewports] = true; |
| 415 | flags[Dirty::RescaleScissors] = true; | 419 | flags[Dirty::RescaleScissors] = true; |
| @@ -522,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 522 | 526 | ||
| 523 | template <class P> | 527 | template <class P> |
| 524 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 528 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 525 | std::vector<ImageId> images; | 529 | boost::container::small_vector<ImageId, 16> images; |
| 526 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | 530 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |
| 527 | if (!image.IsSafeDownload()) { | 531 | if (!image.IsSafeDownload()) { |
| 528 | return; | 532 | return; |
| @@ -575,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| 575 | 579 | ||
| 576 | template <class P> | 580 | template <class P> |
| 577 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | 581 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 578 | std::vector<ImageId> deleted_images; | 582 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 579 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 583 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 580 | for (const ImageId id : deleted_images) { | 584 | for (const ImageId id : deleted_images) { |
| 581 | Image& image = slot_images[id]; | 585 | Image& image = slot_images[id]; |
| @@ -589,19 +593,11 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 589 | 593 | ||
| 590 | template <class P> | 594 | template <class P> |
| 591 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { | 595 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { |
| 592 | std::vector<ImageId> deleted_images; | 596 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 593 | ForEachImageInRegionGPU(as_id, gpu_addr, size, | 597 | ForEachImageInRegionGPU(as_id, gpu_addr, size, |
| 594 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 598 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 595 | for (const ImageId id : deleted_images) { | 599 | for (const ImageId id : deleted_images) { |
| 596 | Image& image = slot_images[id]; | 600 | Image& image = slot_images[id]; |
| 597 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 598 | return; | ||
| 599 | } | ||
| 600 | image.flags |= ImageFlagBits::CpuModified; | ||
| 601 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 602 | UntrackImage(image, id); | ||
| 603 | } | ||
| 604 | /* | ||
| 605 | if (True(image.flags & ImageFlagBits::Remapped)) { | 601 | if (True(image.flags & ImageFlagBits::Remapped)) { |
| 606 | continue; | 602 | continue; |
| 607 | } | 603 | } |
| @@ -609,7 +605,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz | |||
| 609 | if (True(image.flags & ImageFlagBits::Tracked)) { | 605 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 610 | UntrackImage(image, id); | 606 | UntrackImage(image, id); |
| 611 | } | 607 | } |
| 612 | */ | ||
| 613 | } | 608 | } |
| 614 | } | 609 | } |
| 615 | 610 | ||
| @@ -875,6 +870,10 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo | |||
| 875 | return NULL_IMAGE_ID; | 870 | return NULL_IMAGE_ID; |
| 876 | } | 871 | } |
| 877 | auto& image = slot_images[image_id]; | 872 | auto& image = slot_images[image_id]; |
| 873 | if (image.info.type == ImageType::e3D) { | ||
| 874 | // Don't accelerate 3D images. | ||
| 875 | return NULL_IMAGE_ID; | ||
| 876 | } | ||
| 878 | if (!is_upload && !image.info.dma_downloaded) { | 877 | if (!is_upload && !image.info.dma_downloaded) { |
| 879 | // Force a full sync. | 878 | // Force a full sync. |
| 880 | image.info.dma_downloaded = true; | 879 | image.info.dma_downloaded = true; |
| @@ -1097,7 +1096,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1097 | const bool native_bgr = runtime.HasNativeBgr(); | 1096 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1098 | const bool flexible_formats = True(options & RelaxedOptions::Format); | 1097 | const bool flexible_formats = True(options & RelaxedOptions::Format); |
| 1099 | ImageId image_id{}; | 1098 | ImageId image_id{}; |
| 1100 | boost::container::small_vector<ImageId, 1> image_ids; | 1099 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1101 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1100 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1102 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1101 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1103 | return false; | 1102 | return false; |
| @@ -1618,7 +1617,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) | |||
| 1618 | } | 1617 | } |
| 1619 | } | 1618 | } |
| 1620 | ImageId image_id{}; | 1619 | ImageId image_id{}; |
| 1621 | boost::container::small_vector<ImageId, 1> image_ids; | 1620 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1622 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1621 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1623 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1622 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1624 | return false; | 1623 | return false; |
| @@ -1678,7 +1677,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | |||
| 1678 | } | 1677 | } |
| 1679 | 1678 | ||
| 1680 | template <class P> | 1679 | template <class P> |
| 1681 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | 1680 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index) { |
| 1682 | const auto& regs = maxwell3d->regs; | 1681 | const auto& regs = maxwell3d->regs; |
| 1683 | if (index >= regs.rt_control.count) { | 1682 | if (index >= regs.rt_control.count) { |
| 1684 | return ImageViewId{}; | 1683 | return ImageViewId{}; |
| @@ -1692,11 +1691,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | |||
| 1692 | return ImageViewId{}; | 1691 | return ImageViewId{}; |
| 1693 | } | 1692 | } |
| 1694 | const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); | 1693 | const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); |
| 1695 | return FindRenderTargetView(info, gpu_addr, is_clear); | 1694 | return FindRenderTargetView(info, gpu_addr); |
| 1696 | } | 1695 | } |
| 1697 | 1696 | ||
| 1698 | template <class P> | 1697 | template <class P> |
| 1699 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | 1698 | ImageViewId TextureCache<P>::FindDepthBuffer() { |
| 1700 | const auto& regs = maxwell3d->regs; | 1699 | const auto& regs = maxwell3d->regs; |
| 1701 | if (!regs.zeta_enable) { | 1700 | if (!regs.zeta_enable) { |
| 1702 | return ImageViewId{}; | 1701 | return ImageViewId{}; |
| @@ -1706,18 +1705,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | |||
| 1706 | return ImageViewId{}; | 1705 | return ImageViewId{}; |
| 1707 | } | 1706 | } |
| 1708 | const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); | 1707 | const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); |
| 1709 | return FindRenderTargetView(info, gpu_addr, is_clear); | 1708 | return FindRenderTargetView(info, gpu_addr); |
| 1710 | } | 1709 | } |
| 1711 | 1710 | ||
| 1712 | template <class P> | 1711 | template <class P> |
| 1713 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | 1712 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) { |
| 1714 | bool is_clear) { | ||
| 1715 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | ||
| 1716 | ImageId image_id{}; | 1713 | ImageId image_id{}; |
| 1717 | bool delete_state = has_deleted_images; | 1714 | bool delete_state = has_deleted_images; |
| 1718 | do { | 1715 | do { |
| 1719 | has_deleted_images = false; | 1716 | has_deleted_images = false; |
| 1720 | image_id = FindOrInsertImage(info, gpu_addr, options); | 1717 | image_id = FindOrInsertImage(info, gpu_addr); |
| 1721 | delete_state |= has_deleted_images; | 1718 | delete_state |= has_deleted_images; |
| 1722 | } while (has_deleted_images); | 1719 | } while (has_deleted_images); |
| 1723 | has_deleted_images = delete_state; | 1720 | has_deleted_images = delete_state; |
| @@ -1940,7 +1937,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1940 | image.map_view_id = map_id; | 1937 | image.map_view_id = map_id; |
| 1941 | return; | 1938 | return; |
| 1942 | } | 1939 | } |
| 1943 | std::vector<ImageViewId> sparse_maps{}; | 1940 | boost::container::small_vector<ImageViewId, 16> sparse_maps; |
| 1944 | ForEachSparseSegment( | 1941 | ForEachSparseSegment( |
| 1945 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1942 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 1946 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1943 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| @@ -2215,7 +2212,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | |||
| 2215 | 2212 | ||
| 2216 | template <class P> | 2213 | template <class P> |
| 2217 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | 2214 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 2218 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | 2215 | boost::container::small_vector<const AliasedImage*, 8> aliased_images; |
| 2219 | Image& image = slot_images[image_id]; | 2216 | Image& image = slot_images[image_id]; |
| 2220 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); | 2217 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); |
| 2221 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); | 2218 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index d96ddea9d..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -56,7 +56,7 @@ struct ImageViewInOut { | |||
| 56 | struct AsyncDecodeContext { | 56 | struct AsyncDecodeContext { |
| 57 | ImageId image_id; | 57 | ImageId image_id; |
| 58 | Common::ScratchBuffer<u8> decoded_data; | 58 | Common::ScratchBuffer<u8> decoded_data; |
| 59 | std::vector<BufferImageCopy> copies; | 59 | boost::container::small_vector<BufferImageCopy, 16> copies; |
| 60 | std::mutex mutex; | 60 | std::mutex mutex; |
| 61 | std::atomic_bool complete; | 61 | std::atomic_bool complete; |
| 62 | }; | 62 | }; |
| @@ -178,9 +178,8 @@ public: | |||
| 178 | void SynchronizeComputeDescriptors(); | 178 | void SynchronizeComputeDescriptors(); |
| 179 | 179 | ||
| 180 | /// Updates the Render Targets if they can be rescaled | 180 | /// Updates the Render Targets if they can be rescaled |
| 181 | /// @param is_clear True when the render targets are being used for clears | ||
| 182 | /// @retval True if the Render Targets have been rescaled. | 181 | /// @retval True if the Render Targets have been rescaled. |
| 183 | bool RescaleRenderTargets(bool is_clear); | 182 | bool RescaleRenderTargets(); |
| 184 | 183 | ||
| 185 | /// Update bound render targets and upload memory if necessary | 184 | /// Update bound render targets and upload memory if necessary |
| 186 | /// @param is_clear True when the render targets are being used for clears | 185 | /// @param is_clear True when the render targets are being used for clears |
| @@ -336,14 +335,13 @@ private: | |||
| 336 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | 335 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
| 337 | 336 | ||
| 338 | /// Find or create an image view for the given color buffer index | 337 | /// Find or create an image view for the given color buffer index |
| 339 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | 338 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index); |
| 340 | 339 | ||
| 341 | /// Find or create an image view for the depth buffer | 340 | /// Find or create an image view for the depth buffer |
| 342 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | 341 | [[nodiscard]] ImageViewId FindDepthBuffer(); |
| 343 | 342 | ||
| 344 | /// Find or create a view for a render target with the given image parameters | 343 | /// Find or create a view for a render target with the given image parameters |
| 345 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | 344 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr); |
| 346 | bool is_clear); | ||
| 347 | 345 | ||
| 348 | /// Iterates over all the images in a region calling func | 346 | /// Iterates over all the images in a region calling func |
| 349 | template <typename Func> | 347 | template <typename Func> |
| @@ -431,7 +429,7 @@ private: | |||
| 431 | 429 | ||
| 432 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; | 430 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; |
| 433 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | 431 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |
| 434 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 432 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 435 | 433 | ||
| 436 | VAddr virtual_invalid_space{}; | 434 | VAddr virtual_invalid_space{}; |
| 437 | 435 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..9a618a57a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 26 | #include "video_core/surface.h" | 26 | #include "video_core/surface.h" |
| 27 | #include "video_core/texture_cache/decode_bc4.h" | 27 | #include "video_core/texture_cache/decode_bc.h" |
| 28 | #include "video_core/texture_cache/format_lookup_table.h" | 28 | #include "video_core/texture_cache/format_lookup_table.h" |
| 29 | #include "video_core/texture_cache/formatter.h" | 29 | #include "video_core/texture_cache/formatter.h" |
| 30 | #include "video_core/texture_cache/samples_helper.h" | 30 | #include "video_core/texture_cache/samples_helper.h" |
| @@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; | |||
| 61 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 61 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 62 | using VideoCore::Surface::SurfaceType; | 62 | using VideoCore::Surface::SurfaceType; |
| 63 | 63 | ||
| 64 | constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 65 | |||
| 66 | struct LevelInfo { | 64 | struct LevelInfo { |
| 67 | Extent3D size; | 65 | Extent3D size; |
| 68 | Extent3D block; | 66 | Extent3D block; |
| @@ -329,13 +327,13 @@ template <u32 GOB_EXTENT> | |||
| 329 | 327 | ||
| 330 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | 328 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( |
| 331 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | 329 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { |
| 332 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | 330 | const auto slice_offsets = CalculateSliceOffsets(new_info); |
| 333 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | 331 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); |
| 334 | const auto it = std::ranges::find(slice_offsets, diff); | 332 | const auto it = std::ranges::find(slice_offsets, diff); |
| 335 | if (it == slice_offsets.end()) { | 333 | if (it == slice_offsets.end()) { |
| 336 | return std::nullopt; | 334 | return std::nullopt; |
| 337 | } | 335 | } |
| 338 | const std::vector subresources = CalculateSliceSubresources(new_info); | 336 | const auto subresources = CalculateSliceSubresources(new_info); |
| 339 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | 337 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; |
| 340 | const ImageInfo& info = overlap.info; | 338 | const ImageInfo& info = overlap.info; |
| 341 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | 339 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { |
| @@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { | |||
| 612 | } | 610 | } |
| 613 | return output_size; | 611 | return output_size; |
| 614 | } | 612 | } |
| 615 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; | 613 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * |
| 614 | ConvertedBytesPerBlock(info.format); | ||
| 616 | } | 615 | } |
| 617 | 616 | ||
| 618 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { | 617 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { |
| @@ -655,9 +654,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { | |||
| 655 | return sizes; | 654 | return sizes; |
| 656 | } | 655 | } |
| 657 | 656 | ||
| 658 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | 657 | boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) { |
| 659 | ASSERT(info.type == ImageType::e3D); | 658 | ASSERT(info.type == ImageType::e3D); |
| 660 | std::vector<u32> offsets; | 659 | boost::container::small_vector<u32, 16> offsets; |
| 661 | offsets.reserve(NumSlices(info)); | 660 | offsets.reserve(NumSlices(info)); |
| 662 | 661 | ||
| 663 | const LevelInfo level_info = MakeLevelInfo(info); | 662 | const LevelInfo level_info = MakeLevelInfo(info); |
| @@ -679,9 +678,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | |||
| 679 | return offsets; | 678 | return offsets; |
| 680 | } | 679 | } |
| 681 | 680 | ||
| 682 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | 681 | boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 682 | const ImageInfo& info) { | ||
| 683 | ASSERT(info.type == ImageType::e3D); | 683 | ASSERT(info.type == ImageType::e3D); |
| 684 | std::vector<SubresourceBase> subresources; | 684 | boost::container::small_vector<SubresourceBase, 16> subresources; |
| 685 | subresources.reserve(NumSlices(info)); | 685 | subresources.reserve(NumSlices(info)); |
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | 686 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 687 | const s32 depth = AdjustMipSize(info.size.depth, level); | 687 | const s32 depth = AdjustMipSize(info.size.depth, level); |
| @@ -723,8 +723,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | |||
| 723 | } | 723 | } |
| 724 | } | 724 | } |
| 725 | 725 | ||
| 726 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | 726 | boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst, |
| 727 | SubresourceBase base, u32 up_scale, u32 down_shift) { | 727 | const ImageInfo& src, |
| 728 | SubresourceBase base, | ||
| 729 | u32 up_scale, u32 down_shift) { | ||
| 728 | ASSERT(dst.resources.levels >= src.resources.levels); | 730 | ASSERT(dst.resources.levels >= src.resources.levels); |
| 729 | 731 | ||
| 730 | const bool is_dst_3d = dst.type == ImageType::e3D; | 732 | const bool is_dst_3d = dst.type == ImageType::e3D; |
| @@ -733,7 +735,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 733 | ASSERT(src.resources.levels == 1); | 735 | ASSERT(src.resources.levels == 1); |
| 734 | } | 736 | } |
| 735 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; | 737 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; |
| 736 | std::vector<ImageCopy> copies; | 738 | boost::container::small_vector<ImageCopy, 16> copies; |
| 737 | copies.reserve(src.resources.levels); | 739 | copies.reserve(src.resources.levels); |
| 738 | for (s32 level = 0; level < src.resources.levels; ++level) { | 740 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| 739 | ImageCopy& copy = copies.emplace_back(); | 741 | ImageCopy& copy = copies.emplace_back(); |
| @@ -770,9 +772,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 770 | return copies; | 772 | return copies; |
| 771 | } | 773 | } |
| 772 | 774 | ||
| 773 | std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, | 775 | boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src, |
| 774 | u32 down_shift) { | 776 | u32 up_scale, |
| 775 | std::vector<ImageCopy> copies; | 777 | u32 down_shift) { |
| 778 | boost::container::small_vector<ImageCopy, 16> copies; | ||
| 776 | copies.reserve(src.resources.levels); | 779 | copies.reserve(src.resources.levels); |
| 777 | const bool is_3d = src.type == ImageType::e3D; | 780 | const bool is_3d = src.type == ImageType::e3D; |
| 778 | for (s32 level = 0; level < src.resources.levels; ++level) { | 781 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| @@ -824,9 +827,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config | |||
| 824 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); | 827 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); |
| 825 | } | 828 | } |
| 826 | 829 | ||
| 827 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 830 | boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |
| 828 | const ImageInfo& info, std::span<const u8> input, | 831 | GPUVAddr gpu_addr, |
| 829 | std::span<u8> output) { | 832 | const ImageInfo& info, |
| 833 | std::span<const u8> input, | ||
| 834 | std::span<u8> output) { | ||
| 830 | const size_t guest_size_bytes = input.size_bytes(); | 835 | const size_t guest_size_bytes = input.size_bytes(); |
| 831 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | 836 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); |
| 832 | const Extent3D size = info.size; | 837 | const Extent3D size = info.size; |
| @@ -861,7 +866,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP | |||
| 861 | info.tile_width_spacing); | 866 | info.tile_width_spacing); |
| 862 | size_t guest_offset = 0; | 867 | size_t guest_offset = 0; |
| 863 | u32 host_offset = 0; | 868 | u32 host_offset = 0; |
| 864 | std::vector<BufferImageCopy> copies(num_levels); | 869 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 865 | 870 | ||
| 866 | for (s32 level = 0; level < num_levels; ++level) { | 871 | for (s32 level = 0; level < num_levels; ++level) { |
| 867 | const Extent3D level_size = AdjustMipSize(size, level); | 872 | const Extent3D level_size = AdjustMipSize(size, level); |
| @@ -939,7 +944,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 939 | tile_size.height, output.subspan(output_offset)); | 944 | tile_size.height, output.subspan(output_offset)); |
| 940 | 945 | ||
| 941 | output_offset += copy.image_extent.width * copy.image_extent.height * | 946 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 942 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 947 | copy.image_subresource.num_layers * |
| 948 | BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 943 | } else if (astc) { | 949 | } else if (astc) { |
| 944 | // BC1 uses 0.5 bytes per texel | 950 | // BC1 uses 0.5 bytes per texel |
| 945 | // BC3 uses 1 byte per texel | 951 | // BC3 uses 1 byte per texel |
| @@ -950,7 +956,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 950 | 956 | ||
| 951 | const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; | 957 | const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; |
| 952 | const u32 level_size = plane_dim * copy.image_extent.depth * | 958 | const u32 level_size = plane_dim * copy.image_extent.depth * |
| 953 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 959 | copy.image_subresource.num_layers * |
| 960 | BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 954 | decode_scratch.resize_destructive(level_size); | 961 | decode_scratch.resize_destructive(level_size); |
| 955 | 962 | ||
| 956 | Tegra::Texture::ASTC::Decompress( | 963 | Tegra::Texture::ASTC::Decompress( |
| @@ -970,15 +977,20 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 970 | bpp_div; | 977 | bpp_div; |
| 971 | output_offset += static_cast<u32>(copy.buffer_size); | 978 | output_offset += static_cast<u32>(copy.buffer_size); |
| 972 | } else { | 979 | } else { |
| 973 | DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset)); | 980 | const Extent3D image_extent{ |
| 974 | 981 | .width = copy.image_extent.width, | |
| 982 | .height = copy.image_extent.height * copy.image_subresource.num_layers, | ||
| 983 | .depth = copy.image_extent.depth, | ||
| 984 | }; | ||
| 985 | DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); | ||
| 975 | output_offset += copy.image_extent.width * copy.image_extent.height * | 986 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 976 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 987 | copy.image_subresource.num_layers * |
| 988 | ConvertedBytesPerBlock(info.format); | ||
| 977 | } | 989 | } |
| 978 | } | 990 | } |
| 979 | } | 991 | } |
| 980 | 992 | ||
| 981 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | 993 | boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) { |
| 982 | const Extent3D size = info.size; | 994 | const Extent3D size = info.size; |
| 983 | const u32 bytes_per_block = BytesPerBlock(info.format); | 995 | const u32 bytes_per_block = BytesPerBlock(info.format); |
| 984 | if (info.type == ImageType::Linear) { | 996 | if (info.type == ImageType::Linear) { |
| @@ -1006,7 +1018,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | |||
| 1006 | 1018 | ||
| 1007 | u32 host_offset = 0; | 1019 | u32 host_offset = 0; |
| 1008 | 1020 | ||
| 1009 | std::vector<BufferImageCopy> copies(num_levels); | 1021 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 1010 | for (s32 level = 0; level < num_levels; ++level) { | 1022 | for (s32 level = 0; level < num_levels; ++level) { |
| 1011 | const Extent3D level_size = AdjustMipSize(size, level); | 1023 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1012 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | 1024 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); |
| @@ -1042,10 +1054,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | |||
| 1042 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | 1054 | return AdjustMipBlockSize(num_tiles, level_info.block, level); |
| 1043 | } | 1055 | } |
| 1044 | 1056 | ||
| 1045 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | 1057 | boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { |
| 1046 | const Extent2D tile_size = DefaultBlockSize(info.format); | 1058 | const Extent2D tile_size = DefaultBlockSize(info.format); |
| 1047 | if (info.type == ImageType::Linear) { | 1059 | if (info.type == ImageType::Linear) { |
| 1048 | return std::vector{SwizzleParameters{ | 1060 | return {SwizzleParameters{ |
| 1049 | .num_tiles = AdjustTileSize(info.size, tile_size), | 1061 | .num_tiles = AdjustTileSize(info.size, tile_size), |
| 1050 | .block = {}, | 1062 | .block = {}, |
| 1051 | .buffer_offset = 0, | 1063 | .buffer_offset = 0, |
| @@ -1057,7 +1069,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | |||
| 1057 | const s32 num_levels = info.resources.levels; | 1069 | const s32 num_levels = info.resources.levels; |
| 1058 | 1070 | ||
| 1059 | u32 guest_offset = 0; | 1071 | u32 guest_offset = 0; |
| 1060 | std::vector<SwizzleParameters> params(num_levels); | 1072 | boost::container::small_vector<SwizzleParameters, 16> params(num_levels); |
| 1061 | for (s32 level = 0; level < num_levels; ++level) { | 1073 | for (s32 level = 0; level < num_levels; ++level) { |
| 1062 | const Extent3D level_size = AdjustMipSize(size, level); | 1074 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1063 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | 1075 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <boost/container/small_vector.hpp> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/scratch_buffer.h" | 11 | #include "common/scratch_buffer.h" |
| @@ -40,9 +41,10 @@ struct OverlapResult { | |||
| 40 | 41 | ||
| 41 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; | 42 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; |
| 42 | 43 | ||
| 43 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | 44 | [[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info); |
| 44 | 45 | ||
| 45 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | 46 | [[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 47 | const ImageInfo& info); | ||
| 46 | 48 | ||
| 47 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | 49 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); |
| 48 | 50 | ||
| @@ -51,21 +53,18 @@ struct OverlapResult { | |||
| 51 | 53 | ||
| 52 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | 54 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; |
| 53 | 55 | ||
| 54 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | 56 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies( |
| 55 | const ImageInfo& src, | 57 | const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, |
| 56 | SubresourceBase base, u32 up_scale = 1, | 58 | u32 down_shift = 0); |
| 57 | u32 down_shift = 0); | ||
| 58 | 59 | ||
| 59 | [[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, | 60 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies( |
| 60 | u32 up_scale = 1, | 61 | const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); |
| 61 | u32 down_shift = 0); | ||
| 62 | 62 | ||
| 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 64 | 64 | ||
| 65 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 65 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( |
| 66 | GPUVAddr gpu_addr, const ImageInfo& info, | 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 67 | std::span<const u8> input, | 67 | std::span<const u8> input, std::span<u8> output); |
| 68 | std::span<u8> output); | ||
| 69 | 68 | ||
| 70 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 69 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| 71 | const ImageBase& image, std::span<u8> output); | 70 | const ImageBase& image, std::span<u8> output); |
| @@ -73,13 +72,15 @@ struct OverlapResult { | |||
| 73 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 72 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 74 | std::span<BufferImageCopy> copies); | 73 | std::span<BufferImageCopy> copies); |
| 75 | 74 | ||
| 76 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | 75 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies( |
| 76 | const ImageInfo& info); | ||
| 77 | 77 | ||
| 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); |
| 79 | 79 | ||
| 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); |
| 81 | 81 | ||
| 82 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | 82 | [[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles( |
| 83 | const ImageInfo& info); | ||
| 83 | 84 | ||
| 84 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 85 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 85 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, | 86 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, |