From 8d6aefdcc452b602d94a84d13bbbc15f806b689c Mon Sep 17 00:00:00 2001
From: Liam
Date: Wed, 14 Jun 2023 14:11:46 -0400
Subject: video_core: optionally skip barriers on feedback loops
---
src/video_core/texture_cache/texture_cache.h | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..43b7ac0a6 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,10 @@ void TextureCache
::FillComputeImageViews(std::span views) {
template
void TextureCache::CheckFeedbackLoop(std::span views) {
+ if (!Settings::values.barrier_feedback_loops.GetValue()) {
+ return;
+ }
+
const bool requires_barrier = [&] {
for (const auto& view : views) {
if (!view.id) {
--
cgit v1.2.3
From 76a676883a17523fb12eeac6f2b9702e4916b2c2 Mon Sep 17 00:00:00 2001
From: FengChen
Date: Sat, 17 Jun 2023 23:26:39 +0800
Subject: video_core: add samples check when find render target
---
src/video_core/texture_cache/texture_cache.h | 22 ++++++++++------------
src/video_core/texture_cache/texture_cache_base.h | 10 ++++------
2 files changed, 14 insertions(+), 18 deletions(-)
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..f11998e20 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -280,7 +280,7 @@ void TextureCache::SynchronizeComputeDescriptors() {
}
template
-bool TextureCache::RescaleRenderTargets(bool is_clear) {
+bool TextureCache
::RescaleRenderTargets() {
auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
@@ -318,13 +318,13 @@ bool TextureCache
::RescaleRenderTargets(bool is_clear) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
if (flags[Dirty::ColorBuffer0 + index] || force) {
flags[Dirty::ColorBuffer0 + index] = false;
- BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index));
}
check_rescale(color_buffer_id, tmp_color_images[index]);
}
if (flags[Dirty::ZetaBuffer] || force) {
flags[Dirty::ZetaBuffer] = false;
- BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer());
}
check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
@@ -389,7 +389,7 @@ void TextureCache
::UpdateRenderTargets(bool is_clear) {
return;
}
- const bool rescaled = RescaleRenderTargets(is_clear);
+ const bool rescaled = RescaleRenderTargets();
if (is_rescaling != rescaled) {
flags[Dirty::RescaleViewports] = true;
flags[Dirty::RescaleScissors] = true;
@@ -1658,7 +1658,7 @@ SamplerId TextureCache
::FindSampler(const TSCEntry& config) {
}
template
-ImageViewId TextureCache::FindColorBuffer(size_t index, bool is_clear) {
+ImageViewId TextureCache
::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
@@ -1672,11 +1672,11 @@ ImageViewId TextureCache
::FindColorBuffer(size_t index, bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template
-ImageViewId TextureCache::FindDepthBuffer(bool is_clear) {
+ImageViewId TextureCache
::FindDepthBuffer() {
const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
@@ -1686,18 +1686,16 @@ ImageViewId TextureCache
::FindDepthBuffer(bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template
-ImageViewId TextureCache::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear) {
- const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
+ImageViewId TextureCache
::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) {
ImageId image_id{};
bool delete_state = has_deleted_images;
do {
has_deleted_images = false;
- image_id = FindOrInsertImage(info, gpu_addr, options);
+ image_id = FindOrInsertImage(info, gpu_addr);
delete_state |= has_deleted_images;
} while (has_deleted_images);
has_deleted_images = delete_state;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 3bfa92154..c347eccd6 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -166,9 +166,8 @@ public:
void SynchronizeComputeDescriptors();
/// Updates the Render Targets if they can be rescaled
- /// @param is_clear True when the render targets are being used for clears
/// @retval True if the Render Targets have been rescaled.
- bool RescaleRenderTargets(bool is_clear);
+ bool RescaleRenderTargets();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
@@ -324,14 +323,13 @@ private:
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
- [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
+ [[nodiscard]] ImageViewId FindColorBuffer(size_t index);
/// Find or create an image view for the depth buffer
- [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
+ [[nodiscard]] ImageViewId FindDepthBuffer();
/// Find or create a view for a render target with the given image parameters
- [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear);
+ [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
/// Iterates over all the images in a region calling func
template
--
cgit v1.2.3
From 5da70f719703084482933e103e561cc98163f370 Mon Sep 17 00:00:00 2001
From: Kelebek1
Date: Tue, 23 May 2023 14:45:54 +0100
Subject: Remove memory allocations in some hot paths
---
src/video_core/texture_cache/image_base.h | 5 ++-
src/video_core/texture_cache/texture_cache.h | 14 +++----
src/video_core/texture_cache/texture_cache_base.h | 4 +-
src/video_core/texture_cache/util.cpp | 48 +++++++++++++----------
src/video_core/texture_cache/util.h | 31 ++++++++-------
5 files changed, 55 insertions(+), 47 deletions(-)
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
std::vector image_view_infos;
std::vector image_view_ids;
- std::vector slice_offsets;
- std::vector slice_subresources;
+ boost::container::small_vector slice_offsets;
+ boost::container::small_vector slice_subresources;
std::vector aliased_images;
std::vector overlapping_images;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d58bb69ff..d3f03a995 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -526,7 +526,7 @@ void TextureCache::WriteMemory(VAddr cpu_addr, size_t size) {
template
void TextureCache::DownloadMemory(VAddr cpu_addr, size_t size) {
- std::vector images;
+ boost::container::small_vector images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
@@ -579,7 +579,7 @@ std::optional TextureCache::GetFlushArea(V
template
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
- std::vector deleted_images;
+ boost::container::small_vector deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
@@ -593,7 +593,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
template
void TextureCache::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
- std::vector deleted_images;
+ boost::container::small_vector deleted_images;
ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
@@ -1101,7 +1101,7 @@ ImageId TextureCache::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
const bool native_bgr = runtime.HasNativeBgr();
const bool flexible_formats = True(options & RelaxedOptions::Format);
ImageId image_id{};
- boost::container::small_vector image_ids;
+ boost::container::small_vector image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1622,7 +1622,7 @@ ImageId TextureCache::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
}
}
ImageId image_id{};
- boost::container::small_vector image_ids;
+ boost::container::small_vector image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1942,7 +1942,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
image.map_view_id = map_id;
return;
}
- std::vector sparse_maps{};
+ boost::container::small_vector sparse_maps;
ForEachSparseSegment(
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2217,7 +2217,7 @@ void TextureCache::MarkModification(ImageBase& image) noexcept {
template
void TextureCache::SynchronizeAliases(ImageId image_id) {
- boost::container::small_vector aliased_images;
+ boost::container::small_vector aliased_images;
Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 44232b961..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
struct AsyncDecodeContext {
ImageId image_id;
Common::ScratchBuffer decoded_data;
- std::vector copies;
+ boost::container::small_vector copies;
std::mutex mutex;
std::atomic_bool complete;
};
@@ -429,7 +429,7 @@ private:
std::unordered_map, Common::IdentityHash> page_table;
std::unordered_map, Common::IdentityHash> sparse_page_table;
- std::unordered_map> sparse_views;
+ std::unordered_map> sparse_views;
VAddr virtual_invalid_space{};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 95a5b47d8..f781cb7a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -329,13 +329,13 @@ template
[[nodiscard]] std::optional ResolveOverlapRightAddress3D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
- const std::vector slice_offsets = CalculateSliceOffsets(new_info);
+ const auto slice_offsets = CalculateSliceOffsets(new_info);
const u32 diff = static_cast(overlap.gpu_addr - gpu_addr);
const auto it = std::ranges::find(slice_offsets, diff);
if (it == slice_offsets.end()) {
return std::nullopt;
}
- const std::vector subresources = CalculateSliceSubresources(new_info);
+ const auto subresources = CalculateSliceSubresources(new_info);
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
const ImageInfo& info = overlap.info;
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
return sizes;
}
-std::vector CalculateSliceOffsets(const ImageInfo& info) {
+boost::container::small_vector CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector offsets;
+ boost::container::small_vector offsets;
offsets.reserve(NumSlices(info));
const LevelInfo level_info = MakeLevelInfo(info);
@@ -679,9 +679,10 @@ std::vector CalculateSliceOffsets(const ImageInfo& info) {
return offsets;
}
-std::vector CalculateSliceSubresources(const ImageInfo& info) {
+boost::container::small_vector CalculateSliceSubresources(
+ const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector subresources;
+ boost::container::small_vector subresources;
subresources.reserve(NumSlices(info));
for (s32 level = 0; level < info.resources.levels; ++level) {
const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
}
}
-std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
- SubresourceBase base, u32 up_scale, u32 down_shift) {
+boost::container::small_vector MakeShrinkImageCopies(const ImageInfo& dst,
+ const ImageInfo& src,
+ SubresourceBase base,
+ u32 up_scale, u32 down_shift) {
ASSERT(dst.resources.levels >= src.resources.levels);
const bool is_dst_3d = dst.type == ImageType::e3D;
@@ -733,7 +736,7 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
ASSERT(src.resources.levels == 1);
}
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
- std::vector copies;
+ boost::container::small_vector copies;
copies.reserve(src.resources.levels);
for (s32 level = 0; level < src.resources.levels; ++level) {
ImageCopy& copy = copies.emplace_back();
@@ -770,9 +773,10 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
-std::vector MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale,
- u32 down_shift) {
- std::vector copies;
+boost::container::small_vector MakeReinterpretImageCopies(const ImageInfo& src,
+ u32 up_scale,
+ u32 down_shift) {
+ boost::container::small_vector copies;
copies.reserve(src.resources.levels);
const bool is_3d = src.type == ImageType::e3D;
for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
-std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
- const ImageInfo& info, std::span input,
- std::span output) {
+boost::container::small_vector UnswizzleImage(Tegra::MemoryManager& gpu_memory,
+ GPUVAddr gpu_addr,
+ const ImageInfo& info,
+ std::span input,
+ std::span output) {
const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const Extent3D size = info.size;
@@ -861,7 +867,7 @@ std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
info.tile_width_spacing);
size_t guest_offset = 0;
u32 host_offset = 0;
- std::vector copies(num_levels);
+ boost::container::small_vector copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
@@ -978,7 +984,7 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span FullDownloadCopies(const ImageInfo& info) {
+boost::container::small_vector FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) {
@@ -1006,7 +1012,7 @@ std::vector FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0;
- std::vector copies(num_levels);
+ boost::container::small_vector copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
return AdjustMipBlockSize(num_tiles, level_info.block, level);
}
-std::vector FullUploadSwizzles(const ImageInfo& info) {
+boost::container::small_vector FullUploadSwizzles(const ImageInfo& info) {
const Extent2D tile_size = DefaultBlockSize(info.format);
if (info.type == ImageType::Linear) {
- return std::vector{SwizzleParameters{
+ return {SwizzleParameters{
.num_tiles = AdjustTileSize(info.size, tile_size),
.block = {},
.buffer_offset = 0,
@@ -1057,7 +1063,7 @@ std::vector FullUploadSwizzles(const ImageInfo& info) {
const s32 num_levels = info.resources.levels;
u32 guest_offset = 0;
- std::vector params(num_levels);
+ boost::container::small_vector params(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 84aa6880d..ab45a43c4 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -5,6 +5,7 @@
#include
#include
+#include
#include "common/common_types.h"
#include "common/scratch_buffer.h"
@@ -40,9 +41,10 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
-[[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector CalculateSliceOffsets(const ImageInfo& info);
-[[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector CalculateSliceSubresources(
+ const ImageInfo& info);
[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
@@ -51,21 +53,18 @@ struct OverlapResult {
[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
-[[nodiscard]] std::vector MakeShrinkImageCopies(const ImageInfo& dst,
- const ImageInfo& src,
- SubresourceBase base, u32 up_scale = 1,
- u32 down_shift = 0);
+[[nodiscard]] boost::container::small_vector MakeShrinkImageCopies(
+ const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
+ u32 down_shift = 0);
-[[nodiscard]] std::vector MakeReinterpretImageCopies(const ImageInfo& src,
- u32 up_scale = 1,
- u32 down_shift = 0);
+[[nodiscard]] boost::container::small_vector MakeReinterpretImageCopies(
+ const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
-[[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory,
- GPUVAddr gpu_addr, const ImageInfo& info,
- std::span input,
- std::span output);
+[[nodiscard]] boost::container::small_vector UnswizzleImage(
+ Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span input, std::span output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span output);
@@ -73,13 +72,15 @@ struct OverlapResult {
void ConvertImage(std::span input, const ImageInfo& info, std::span output,
std::span copies);
-[[nodiscard]] std::vector FullDownloadCopies(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector FullDownloadCopies(
+ const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
-[[nodiscard]] std::vector FullUploadSwizzles(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector FullUploadSwizzles(
+ const ImageInfo& info);
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span copies, std::span memory,
--
cgit v1.2.3
From eac46ad7ceca5e35b396a8b80bfc38dc6ef1a4fe Mon Sep 17 00:00:00 2001
From: GPUCode
Date: Tue, 6 Jun 2023 23:10:06 +0300
Subject: video_core: Add BCn decoding support
---
src/video_core/texture_cache/decode_bc.cpp | 129 ++++++++++++++++++++++++++++
src/video_core/texture_cache/decode_bc.h | 19 ++++
src/video_core/texture_cache/decode_bc4.cpp | 96 ---------------------
src/video_core/texture_cache/decode_bc4.h | 15 ----
src/video_core/texture_cache/util.cpp | 24 ++++--
5 files changed, 163 insertions(+), 120 deletions(-)
create mode 100644 src/video_core/texture_cache/decode_bc.cpp
create mode 100644 src/video_core/texture_cache/decode_bc.h
delete mode 100644 src/video_core/texture_cache/decode_bc4.cpp
delete mode 100644 src/video_core/texture_cache/decode_bc4.h
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp
new file mode 100644
index 000000000..3e26474a3
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc.cpp
@@ -0,0 +1,129 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include
+#include
+#include
+#include
+
+#include "common/common_types.h"
+#include "video_core/texture_cache/decode_bc.h"
+
+namespace VideoCommon {
+
+namespace {
+constexpr u32 BLOCK_SIZE = 4;
+
+using VideoCore::Surface::PixelFormat;
+
+constexpr bool IsSigned(PixelFormat pixel_format) {
+ switch (pixel_format) {
+ case PixelFormat::BC4_SNORM:
+ case PixelFormat::BC4_UNORM:
+ case PixelFormat::BC5_SNORM:
+ case PixelFormat::BC5_UNORM:
+ case PixelFormat::BC6H_SFLOAT:
+ case PixelFormat::BC6H_UFLOAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+constexpr u32 BlockSize(PixelFormat pixel_format) {
+ switch (pixel_format) {
+ case PixelFormat::BC1_RGBA_SRGB:
+ case PixelFormat::BC1_RGBA_UNORM:
+ case PixelFormat::BC4_SNORM:
+ case PixelFormat::BC4_UNORM:
+ return 8;
+ default:
+ return 16;
+ }
+}
+} // Anonymous namespace
+
+u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
+ switch (pixel_format) {
+ case PixelFormat::BC4_SNORM:
+ case PixelFormat::BC4_UNORM:
+ return 1;
+ case PixelFormat::BC5_SNORM:
+ case PixelFormat::BC5_UNORM:
+ return 2;
+ case PixelFormat::BC6H_SFLOAT:
+ case PixelFormat::BC6H_UFLOAT:
+ return 8;
+ default:
+ return 4;
+ }
+}
+
+template
+void DecompressBlocks(std::span input, std::span output, Extent3D extent,
+ bool is_signed = false) {
+ const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
+ const u32 block_width = std::min(extent.width, BLOCK_SIZE);
+ const u32 block_height = std::min(extent.height, BLOCK_SIZE);
+ const u32 pitch = extent.width * out_bpp;
+ size_t input_offset = 0;
+ size_t output_offset = 0;
+ for (u32 slice = 0; slice < extent.depth; ++slice) {
+ for (u32 y = 0; y < extent.height; y += block_height) {
+ size_t row_offset = 0;
+ for (u32 x = 0; x < extent.width;
+ x += block_width, row_offset += block_width * out_bpp) {
+ const u8* src = input.data() + input_offset;
+ u8* const dst = output.data() + output_offset + row_offset;
+ if constexpr (IsSigned(pixel_format)) {
+ decompress(src, dst, x, y, extent.width, extent.height, is_signed);
+ } else {
+ decompress(src, dst, x, y, extent.width, extent.height);
+ }
+ input_offset += BlockSize(pixel_format);
+ }
+ output_offset += block_height * pitch;
+ }
+ }
+}
+
+void DecompressBCn(std::span input, std::span output, Extent3D extent,
+ VideoCore::Surface::PixelFormat pixel_format) {
+ switch (pixel_format) {
+ case PixelFormat::BC1_RGBA_UNORM:
+ case PixelFormat::BC1_RGBA_SRGB:
+ DecompressBlocks(input, output, extent);
+ break;
+ case PixelFormat::BC2_UNORM:
+ case PixelFormat::BC2_SRGB:
+ DecompressBlocks(input, output, extent);
+ break;
+ case PixelFormat::BC3_UNORM:
+ case PixelFormat::BC3_SRGB:
+ DecompressBlocks(input, output, extent);
+ break;
+ case PixelFormat::BC4_SNORM:
+ case PixelFormat::BC4_UNORM:
+ DecompressBlocks(
+ input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
+ break;
+ case PixelFormat::BC5_SNORM:
+ case PixelFormat::BC5_UNORM:
+ DecompressBlocks(
+ input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
+ break;
+ case PixelFormat::BC6H_SFLOAT:
+ case PixelFormat::BC6H_UFLOAT:
+ DecompressBlocks(
+ input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
+ break;
+ case PixelFormat::BC7_SRGB:
+ case PixelFormat::BC7_UNORM:
+ DecompressBlocks(input, output, extent);
+ break;
+ default:
+ LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h
new file mode 100644
index 000000000..41d1ec0a3
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc.h
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+
+#include "common/common_types.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+[[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format);
+
+void DecompressBCn(std::span input, std::span output, Extent3D extent,
+ VideoCore::Surface::PixelFormat pixel_format);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
deleted file mode 100644
index ef98afdca..000000000
--- a/src/video_core/texture_cache/decode_bc4.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include
-#include
-#include
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/texture_cache/decode_bc4.h"
-#include "video_core/texture_cache/types.h"
-
-namespace VideoCommon {
-
-// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
-[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
- const u32 code_offset = 16 + 3 * (4 * y + x);
- const u32 code = (bits >> code_offset) & 7;
- const u32 red0 = (bits >> 0) & 0xff;
- const u32 red1 = (bits >> 8) & 0xff;
- if (red0 > red1) {
- switch (code) {
- case 0:
- return red0;
- case 1:
- return red1;
- case 2:
- return (6 * red0 + 1 * red1) / 7;
- case 3:
- return (5 * red0 + 2 * red1) / 7;
- case 4:
- return (4 * red0 + 3 * red1) / 7;
- case 5:
- return (3 * red0 + 4 * red1) / 7;
- case 6:
- return (2 * red0 + 5 * red1) / 7;
- case 7:
- return (1 * red0 + 6 * red1) / 7;
- }
- } else {
- switch (code) {
- case 0:
- return red0;
- case 1:
- return red1;
- case 2:
- return (4 * red0 + 1 * red1) / 5;
- case 3:
- return (3 * red0 + 2 * red1) / 5;
- case 4:
- return (2 * red0 + 3 * red1) / 5;
- case 5:
- return (1 * red0 + 4 * red1) / 5;
- case 6:
- return 0;
- case 7:
- return 0xff;
- }
- }
- return 0;
-}
-
-void DecompressBC4(std::span input, Extent3D extent, std::span output) {
- UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
- UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
- static constexpr u32 BLOCK_SIZE = 4;
- size_t input_offset = 0;
- for (u32 slice = 0; slice < extent.depth; ++slice) {
- for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
- for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
- u64 bits;
- std::memcpy(&bits, &input[input_offset], sizeof(bits));
- input_offset += sizeof(bits);
-
- for (u32 y = 0; y < BLOCK_SIZE; ++y) {
- for (u32 x = 0; x < BLOCK_SIZE; ++x) {
- const u32 linear_z = slice;
- const u32 linear_y = block_y * BLOCK_SIZE + y;
- const u32 linear_x = block_x * BLOCK_SIZE + x;
- const u32 offset_z = linear_z * extent.width * extent.height;
- const u32 offset_y = linear_y * extent.width;
- const u32 offset_x = linear_x;
- const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
- const u32 color = DecompressBlock(bits, x, y);
- output[output_offset + 0] = static_cast(color);
- output[output_offset + 1] = 0;
- output[output_offset + 2] = 0;
- output[output_offset + 3] = 0xff;
- }
- }
- }
- }
- }
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
deleted file mode 100644
index ab2f735be..000000000
--- a/src/video_core/texture_cache/decode_bc4.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include
-
-#include "common/common_types.h"
-#include "video_core/texture_cache/types.h"
-
-namespace VideoCommon {
-
-void DecompressBC4(std::span data, Extent3D extent, std::span output);
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index f781cb7a0..9a618a57a 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -24,7 +24,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/surface.h"
-#include "video_core/texture_cache/decode_bc4.h"
+#include "video_core/texture_cache/decode_bc.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
@@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType;
-constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
-
struct LevelInfo {
Extent3D size;
Extent3D block;
@@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
}
return output_size;
}
- return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
+ return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers *
+ ConvertedBytesPerBlock(info.format);
}
u32 CalculateLayerStride(const ImageInfo& info) noexcept {
@@ -945,7 +944,8 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span input, const ImageInfo& info, std::span input, const ImageInfo& info, std::span(copy.buffer_size);
} else {
- DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
-
+ const Extent3D image_extent{
+ .width = copy.image_extent.width,
+ .height = copy.image_extent.height * copy.image_subresource.num_layers,
+ .depth = copy.image_extent.depth,
+ };
+ DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format);
output_offset += copy.image_extent.width * copy.image_extent.height *
- copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ copy.image_subresource.num_layers *
+ ConvertedBytesPerBlock(info.format);
}
}
}
--
cgit v1.2.3
From b62121fd605663dc9aaaae72fe8e444312f9c5d5 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Thu, 29 Jun 2023 11:58:45 +0200
Subject: Texture cache: Fix YFC regression due to code testing
---
src/video_core/texture_cache/texture_cache.h | 9 ---------
1 file changed, 9 deletions(-)
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d3f03a995..485f6b6f3 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -598,14 +598,6 @@ void TextureCache::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
- if (True(image.flags & ImageFlagBits::CpuModified)) {
- return;
- }
- image.flags |= ImageFlagBits::CpuModified;
- if (True(image.flags & ImageFlagBits::Tracked)) {
- UntrackImage(image, id);
- }
- /*
if (True(image.flags & ImageFlagBits::Remapped)) {
continue;
}
@@ -613,7 +605,6 @@ void TextureCache
::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
- */
}
}
--
cgit v1.2.3
From 596a6132b974dd73935854d8f51842424e058be8 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Thu, 29 Jun 2023 17:23:29 +0200
Subject: AccelerateDMA: Don't accelerate 3D texture DMA operations
---
src/video_core/texture_cache/texture_cache.h | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'src/video_core/texture_cache')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d3f03a995..0330415b7 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -879,6 +879,10 @@ ImageId TextureCache
::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
return NULL_IMAGE_ID;
}
auto& image = slot_images[image_id];
+ if (image.info.type == ImageType::e3D) {
+ // Don't accelerate 3D images.
+ return NULL_IMAGE_ID;
+ }
if (!is_upload && !image.info.dma_downloaded) {
// Force a full sync.
image.info.dma_downloaded = true;
--
cgit v1.2.3