From 779f4ac72d2ea2788c2106c8d2d1ec0e01b77b81 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 21 Nov 2021 05:32:34 +0100 Subject: TextureCache: Eliminate format deduction as full depth conversion has been supported. --- src/video_core/texture_cache/texture_cache.h | 6 ++---- src/video_core/texture_cache/util.cpp | 28 +++------------------------- 2 files changed, 5 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44a0d42ba..0e4907c53 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1079,7 +1079,7 @@ ImageId TextureCache
::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
template ::BlitImages TextureCache ::GetBlitImages(
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
- static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
+ static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples;
const GPUVAddr dst_addr = dst.Address();
const GPUVAddr src_addr = src.Address();
ImageInfo dst_info(dst);
@@ -1093,9 +1093,7 @@ typename TextureCache ::BlitImages TextureCache ::GetBlitImages(
const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
DeduceBlitImages(dst_info, src_info, dst_image, src_image);
- if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
- continue;
- }
+ ASSERT(GetFormatType(dst_info.format) == GetFormatType(src_info.format));
RelaxedOptions find_options{};
if (src_info.num_samples > 1) {
// it's a resolve, we must enforce the same format.
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index e4d82631e..777503488 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1152,36 +1152,14 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
const ImageBase* src) {
bool is_resolve = false;
- const auto original_src_format = src_info.format;
- const auto original_dst_format = dst_info.format;
if (src) {
- if (GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
- src_info.format = src->info.format;
- }
is_resolve = src->info.num_samples > 1;
src_info.num_samples = src->info.num_samples;
src_info.size = src->info.size;
}
- if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
- dst_info.format = dst->info.format;
- }
- if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
- if (dst) {
- if (GetFormatType(dst->info.format) == SurfaceType::ColorTexture) {
- src_info.format = original_src_format;
- }
- } else {
- dst_info.format = src->info.format;
- }
- }
- if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
- if (src) {
- if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) {
- dst_info.format = original_dst_format;
- }
- } else {
- src_info.format = dst->info.format;
- }
+ if (dst) {
+ dst_info.num_samples = dst->info.num_samples;
+ dst_info.size = dst->info.size;
}
ASSERT(!is_resolve || dst_info.format == src_info.format);
}
--
cgit v1.2.3
From b96caf200d047b81554c3839c7a6a7c35b251944 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Sun, 21 Nov 2021 20:52:39 +0100
Subject: HostShaders: Fix D24S8 convertion shaders.
---
.../host_shaders/convert_abgr8_to_d24s8.frag | 7 ++++---
.../host_shaders/convert_b10g11r11_to_d24s8.frag | 18 +++++++++++++-----
.../host_shaders/convert_d24s8_to_abgr8.frag | 10 ++++++----
.../host_shaders/convert_d24s8_to_b10g11r11.frag | 19 +++++++++++++++----
.../host_shaders/convert_d24s8_to_r16g16.frag | 7 ++++---
.../host_shaders/convert_r16g16_to_d24s8.frag | 9 +++++----
6 files changed, 47 insertions(+), 23 deletions(-)
(limited to 'src')
diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
index 4e4ab6a26..d51397a0c 100644
--- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
@@ -10,8 +10,9 @@ layout(binding = 0) uniform sampler2D color_texture;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f));
- uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b;
+ uvec4 bytes = color << uvec4(24, 16, 8, 0);
+ uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w;
- gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f);
- gl_FragStencilRefARB = int(color.a);
+ gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
+ gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
}
diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
index 2999a84cf..11bdd861d 100644
--- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
@@ -7,13 +7,21 @@
layout(binding = 0) uniform sampler2D color_texture;
+uint conv_from_float(float value_f, uint mantissa_bits) {
+ uint value = floatBitsToInt(value_f);
+ uint exp = (value >> 23) & 0x1Fu;
+ uint mantissa_shift = 32u - mantissa_bits;
+ uint mantissa = (value << 9u) >> mantissa_shift;
+ return (exp << mantissa_bits) | mantissa;
+}
+
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
vec4 color = texelFetch(color_texture, coord, 0).rgba;
- uint depth_stencil_unorm = (uint(color.b * (exp2(10) - 1.0f)) << 22)
- | (uint(color.g * (exp2(11) - 1.0f)) << 11)
- | (uint(color.r * (exp2(11) - 1.0f)));
+ uint depth_stencil_unorm = (conv_from_float(color.r, 6u) << 21)
+ | (conv_from_float(color.g, 6u) << 10)
+ | conv_from_float(color.b, 5u);
- gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
- gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
+ gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
+ gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
index ff3bf8209..47f9c1abc 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -14,8 +14,10 @@ void main() {
uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
- color.r = float(depth >> 16) / (exp2(8) - 1.0);
- color.g = float((depth >> 8) & 0x00FF) / (exp2(8) - 1.0);
- color.b = float(depth & 0x00FF) / (exp2(8) - 1.0);
- color.a = float(stencil) / (exp2(8) - 1.0);
+ highp uint depth_val =
+ uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
+ lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
+ highp uvec4 components =
+ uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
+ color = vec4(components) / (exp2(8.0) - 1.0);
}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
index c743d3a13..c2d935fcd 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
@@ -9,13 +9,24 @@ layout(binding = 1) uniform isampler2D stencil_tex;
layout(location = 0) out vec4 color;
+float conv_to_float(uint value, uint mantissa_bits) {
+ uint exp = (value >> mantissa_bits) & 0x1Fu;
+ uint mantissa_shift = 32u - mantissa_bits;
+ uint mantissa = (value << mantissa_shift) >> mantissa_shift;
+ return uintBitsToFloat((exp << 23) | (mantissa << (23 - mantissa_bits)));
+}
+
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+ uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f));
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+ uint depth_stencil = (stencil << 24) | (depth >> 8);
+ uint red_int = (depth_stencil >> 21) & 0x07FF;
+ uint green_int = (depth_stencil >> 10) & 0x07FF;
+ uint blue_int = depth_stencil & 0x03FF;
- color.b = float(depth >> 22) / (exp2(10) - 1.0);
- color.g = float((depth >> 11) & 0x00FF) / (exp2(11) - 1.0);
- color.r = float(depth & 0x00FF) / (exp2(11) - 1.0);
+ color.r = conv_to_float(red_int, 6u);
+ color.g = conv_to_float(green_int, 6u);
+ color.b = conv_to_float(blue_int, 5u);
color.a = 1.0f;
}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
index 2a9443d3d..c48a7ac66 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
@@ -11,11 +11,12 @@ layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+ uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f));
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+ uint depth_stencil = (stencil << 24) | (depth >> 8);
- color.r = float(depth >> 16) / (exp2(16) - 1.0);
- color.g = float((depth >> 16) & 0x00FF) / (exp2(16) - 1.0);
+ color.r = float(depth_stencil & 0x0000FFFFu) / (exp2(16) - 1.0);
+ color.g = float(depth_stencil >> 16) / (exp2(16) - 1.0);
color.b = 0.0f;
color.a = 1.0f;
}
diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
index 3df70575e..beb2d1284 100644
--- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
@@ -10,9 +10,10 @@ layout(binding = 0) uniform sampler2D color_texture;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
vec4 color = texelFetch(color_texture, coord, 0).rgba;
- uint depth_stencil_unorm = (uint(color.r * (exp2(16) - 1.0f)) << 16)
- | (uint(color.g * (exp2(16) - 1.0f)) << 16);
+ uvec2 bytes = uvec2(color.rg * (exp2(16) - 1.0f)) << uvec2(0, 16);
+ uint depth_stencil_unorm =
+ uint(color.r * (exp2(16) - 1.0f)) | (uint(color.g * (exp2(16) - 1.0f)) << 16);
- gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
- gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
+ gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
+ gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
}
--
cgit v1.2.3
From d7f4434bd534d53e8aea293e39629bf8ca8ee123 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Sun, 21 Nov 2021 21:09:49 +0100
Subject: VulkanTexturECache: Use reinterpret on D32_S8 formats.
---
src/video_core/renderer_vulkan/vk_texture_cache.cpp | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
(limited to 'src')
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 3964424af..e1ba1bdaf 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -775,8 +775,13 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
if (VideoCore::Surface::GetFormatType(dst.info.format) ==
- VideoCore::Surface::SurfaceType::DepthStencil) {
- return !device.IsExtShaderStencilExportSupported();
+ VideoCore::Surface::SurfaceType::DepthStencil &&
+ !device.IsExtShaderStencilExportSupported()) {
+ return true;
+ }
+ if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT ||
+ src.info.format == PixelFormat::D32_FLOAT_S8_UINT) {
+ return true;
}
return false;
}
--
cgit v1.2.3
From 853284943901560081f6ff992b6c04b7c33f0d21 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Mon, 22 Nov 2021 00:00:01 +0100
Subject: TextureCache: Simplify blitting of D24S8 formats and fix bugs.
---
src/video_core/host_shaders/CMakeLists.txt | 4 -
.../host_shaders/convert_b10g11r11_to_d24s8.frag | 27 ------
.../host_shaders/convert_d24s8_to_b10g11r11.frag | 32 -------
.../host_shaders/convert_d24s8_to_r16g16.frag | 22 -----
.../host_shaders/convert_r16g16_to_d24s8.frag | 19 -----
src/video_core/renderer_vulkan/blit_image.cpp | 98 +++++++++++-----------
src/video_core/renderer_vulkan/blit_image.h | 25 +-----
.../renderer_vulkan/vk_texture_cache.cpp | 30 ++-----
src/video_core/renderer_vulkan/vk_texture_cache.h | 3 +
src/video_core/texture_cache/texture_cache.h | 8 +-
10 files changed, 73 insertions(+), 195 deletions(-)
delete mode 100644 src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
delete mode 100644 src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
delete mode 100644 src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
delete mode 100644 src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
(limited to 'src')
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 1c91999d7..fd3e41434 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -11,13 +11,9 @@ set(SHADER_FILES
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_to_d24s8.frag
- convert_b10g11r11_to_d24s8.frag
convert_d24s8_to_abgr8.frag
- convert_d24s8_to_b10g11r11.frag
- convert_d24s8_to_r16g16.frag
convert_depth_to_float.frag
convert_float_to_depth.frag
- convert_r16g16_to_d24s8.frag
full_screen_triangle.vert
fxaa.frag
fxaa.vert
diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
deleted file mode 100644
index 11bdd861d..000000000
--- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 450
-#extension GL_ARB_shader_stencil_export : require
-
-layout(binding = 0) uniform sampler2D color_texture;
-
-uint conv_from_float(float value_f, uint mantissa_bits) {
- uint value = floatBitsToInt(value_f);
- uint exp = (value >> 23) & 0x1Fu;
- uint mantissa_shift = 32u - mantissa_bits;
- uint mantissa = (value << 9u) >> mantissa_shift;
- return (exp << mantissa_bits) | mantissa;
-}
-
-void main() {
- ivec2 coord = ivec2(gl_FragCoord.xy);
- vec4 color = texelFetch(color_texture, coord, 0).rgba;
- uint depth_stencil_unorm = (conv_from_float(color.r, 6u) << 21)
- | (conv_from_float(color.g, 6u) << 10)
- | conv_from_float(color.b, 5u);
-
- gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
- gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
-}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
deleted file mode 100644
index c2d935fcd..000000000
--- a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 450
-
-layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
-
-layout(location = 0) out vec4 color;
-
-float conv_to_float(uint value, uint mantissa_bits) {
- uint exp = (value >> mantissa_bits) & 0x1Fu;
- uint mantissa_shift = 32u - mantissa_bits;
- uint mantissa = (value << mantissa_shift) >> mantissa_shift;
- return uintBitsToFloat((exp << 23) | (mantissa << (23 - mantissa_bits)));
-}
-
-void main() {
- ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f));
- uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
- uint depth_stencil = (stencil << 24) | (depth >> 8);
- uint red_int = (depth_stencil >> 21) & 0x07FF;
- uint green_int = (depth_stencil >> 10) & 0x07FF;
- uint blue_int = depth_stencil & 0x03FF;
-
- color.r = conv_to_float(red_int, 6u);
- color.g = conv_to_float(green_int, 6u);
- color.b = conv_to_float(blue_int, 5u);
- color.a = 1.0f;
-}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
deleted file mode 100644
index c48a7ac66..000000000
--- a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 450
-
-layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
-
-layout(location = 0) out vec4 color;
-
-void main() {
- ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f));
- uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
- uint depth_stencil = (stencil << 24) | (depth >> 8);
-
- color.r = float(depth_stencil & 0x0000FFFFu) / (exp2(16) - 1.0);
- color.g = float(depth_stencil >> 16) / (exp2(16) - 1.0);
- color.b = 0.0f;
- color.a = 1.0f;
-}
diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
deleted file mode 100644
index beb2d1284..000000000
--- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 450
-#extension GL_ARB_shader_stencil_export : require
-
-layout(binding = 0) uniform sampler2D color_texture;
-
-void main() {
- ivec2 coord = ivec2(gl_FragCoord.xy);
- vec4 color = texelFetch(color_texture, coord, 0).rgba;
- uvec2 bytes = uvec2(color.rg * (exp2(16) - 1.0f)) << uvec2(0, 16);
- uint depth_stencil_unorm =
- uint(color.r * (exp2(16) - 1.0f)) | (uint(color.g * (exp2(16) - 1.0f)) << 16);
-
- gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
- gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
-}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 28b631f73..2e69e270f 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -5,13 +5,9 @@
#include ::CopyImage(ImageId dst_id, ImageId src_id, std::vector ::BlitImages TextureCache ::GetBlitImages(
ImageId src_id;
do {
has_deleted_images = false;
- dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
- const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
- DeduceBlitImages(dst_info, src_info, dst_image, src_image);
- ASSERT(GetFormatType(dst_info.format) == GetFormatType(src_info.format));
- RelaxedOptions find_options{};
- if (src_info.num_samples > 1) {
- // it's a resolve, we must enforce the same format.
- find_options = RelaxedOptions::ForceBrokenViews;
- }
- src_id = FindOrInsertImage(src_info, src_addr, find_options);
- dst_id = FindOrInsertImage(dst_info, dst_addr, find_options);
+ if (src_image && src_image->info.num_samples > 1) {
+ RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
+ src_id = FindOrInsertImage(src_info, src_addr, find_options);
+ dst_id = FindOrInsertImage(dst_info, dst_addr, find_options);
+ if (has_deleted_images) {
+ continue;
+ }
+ }
+ dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
+ if (!src_id) {
+ src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
+ }
+ if (!dst_id) {
+ dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
+ }
} while (has_deleted_images);
return BlitImages{
.dst_id = dst_id,
--
cgit v1.2.3
From ecefc932e64bf4ab8442d3c9808a2e54429e7001 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Fri, 26 Nov 2021 21:36:53 +0100
Subject: Texture Cache: Redesigning the blitting system (again).
---
src/video_core/texture_cache/texture_cache.h | 52 +++++++++++++++++++----
src/video_core/texture_cache/texture_cache_base.h | 3 +-
src/video_core/texture_cache/util.cpp | 32 ++++++++------
3 files changed, 64 insertions(+), 23 deletions(-)
(limited to 'src')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 570da2b04..f24de9a38 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -472,7 +472,7 @@ template ::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy) {
- const BlitImages images = GetBlitImages(dst, src);
+ const BlitImages images = GetBlitImages(dst, src, copy);
const ImageId dst_id = images.dst_id;
const ImageId src_id = images.src_id;
@@ -762,12 +762,15 @@ ImageId TextureCache ::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
const bool broken_views =
runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews);
const bool native_bgr = runtime.HasNativeBgr();
- ImageId image_id;
+ const bool flexible_formats = True(options & RelaxedOptions::Format);
+ ImageId image_id{};
+ boost::container::small_vector ::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
IsPitchLinearSameSize(existing, info, strict_size) &&
IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
image_id = existing_image_id;
- return true;
+ image_ids.push_back(existing_image_id);
+ return !flexible_formats && existing.format == info.format;
}
} else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
native_bgr)) {
image_id = existing_image_id;
- return true;
+ image_ids.push_back(existing_image_id);
+ return !flexible_formats && existing_image.info.format == info.format;
}
return false;
};
ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
- return image_id;
+ if (image_ids.size() <= 1) [[likely]] {
+ return image_id;
+ }
+ auto image_ids_compare = [this](ImageId a, ImageId b) {
+ auto& image_a = slot_images[a];
+ auto& image_b = slot_images[b];
+ return image_a.modification_tick < image_b.modification_tick;
+ };
+ return *std::ranges::max_element(image_ids, image_ids_compare);
}
template ::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
template ::BlitImages TextureCache ::GetBlitImages(
- const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
+ const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy) {
+
static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples;
const GPUVAddr dst_addr = dst.Address();
const GPUVAddr src_addr = src.Address();
ImageInfo dst_info(dst);
ImageInfo src_info(src);
+ const bool can_be_depth_blit =
+ dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point;
ImageId dst_id;
ImageId src_id;
+ RelaxedOptions try_options = FIND_OPTIONS;
+ if (can_be_depth_blit) {
+ try_options |= RelaxedOptions::Format;
+ }
do {
has_deleted_images = false;
- src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
+ src_id = FindImage(src_info, src_addr, try_options);
+ dst_id = FindImage(dst_info, dst_addr, try_options);
const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
if (src_image && src_image->info.num_samples > 1) {
RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
@@ -1097,8 +1119,15 @@ typename TextureCache ::BlitImages TextureCache ::GetBlitImages(
if (has_deleted_images) {
continue;
}
+ break;
+ }
+ if (can_be_depth_blit) {
+ const ImageBase* const dst_image = src_id ? &slot_images[src_id] : nullptr;
+ DeduceBlitImages(dst_info, src_info, dst_image, src_image);
+ if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
+ continue;
+ }
}
- dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
if (!src_id) {
src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
}
@@ -1106,6 +1135,11 @@ typename TextureCache ::BlitImages TextureCache ::GetBlitImages(
dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
}
} while (has_deleted_images);
+ if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) {
+ // Make sure the images are depth and/or stencil textures.
+ src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{});
+ dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
+ }
return BlitImages{
.dst_id = dst_id,
.src_id = src_id,
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 643ad811c..7107887a6 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -252,7 +252,8 @@ private:
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
- const Tegra::Engines::Fermi2D::Surface& src);
+ const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 9b1613008..7bd31b211 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1151,19 +1151,25 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
const ImageBase* src) {
- bool is_resolve = false;
- if (src) {
- is_resolve = src->info.num_samples > 1;
- src_info.num_samples = src->info.num_samples;
- src_info.size.width = src->info.size.width;
- src_info.size.height = src->info.size.height;
- }
- if (dst) {
- dst_info.num_samples = dst->info.num_samples;
- dst_info.size.width = dst->info.size.width;
- dst_info.size.height = dst->info.size.height;
- }
- ASSERT(!is_resolve || dst_info.format == src_info.format);
+ const auto original_dst_format = dst_info.format;
+ if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ src_info.format = src->info.format;
+ }
+ if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ dst_info.format = dst->info.format;
+ }
+ if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ dst_info.format = src->info.format;
+ }
+ if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ if (src) {
+ if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) {
+ dst_info.format = original_dst_format;
+ }
+ } else {
+ src_info.format = dst->info.format;
+ }
+ }
}
u32 MapSizeBytes(const ImageBase& image) {
--
cgit v1.2.3
From 5a3463bc2b1489dda6b5fe90110f9260f6b68463 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Sat, 27 Nov 2021 23:49:56 +0100
Subject: Texture Cache: Secure insertions against deletions.
---
src/video_core/texture_cache/texture_cache.h | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
(limited to 'src')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f24de9a38..565b99254 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1137,8 +1137,11 @@ typename TextureCache ::BlitImages TextureCache ::GetBlitImages(
} while (has_deleted_images);
if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) {
// Make sure the images are depth and/or stencil textures.
- src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{});
- dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
+ do {
+ has_deleted_images = false;
+ src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{});
+ dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
+ } while (has_deleted_images);
}
return BlitImages{
.dst_id = dst_id,
@@ -1196,7 +1199,14 @@ template ::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear) {
const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
- const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
+ ImageId image_id{};
+ bool delete_state = has_deleted_images;
+ do {
+ has_deleted_images = false;
+ image_id = FindOrInsertImage(info, gpu_addr, options);
+ delete_state |= has_deleted_images;
+ } while (has_deleted_images);
+ has_deleted_images = delete_state;
if (!image_id) {
return NULL_IMAGE_VIEW_ID;
}
--
cgit v1.2.3