summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Feng Chen2021-12-18 13:57:14 +0800
committerGravatar GitHub2021-12-18 13:57:14 +0800
commite49184e6069a9d791d2df3c1958f5c4b1187e124 (patch)
treeb776caf722e0be0e680f67b0ad0842628162ef1c /src/video_core
parentImplement convert legacy to generic (diff)
parentMerge pull request #7570 from ameerj/favorites-expanded (diff)
downloadyuzu-e49184e6069a9d791d2df3c1958f5c4b1187e124.tar.gz
yuzu-e49184e6069a9d791d2df3c1958f5c4b1187e124.tar.xz
yuzu-e49184e6069a9d791d2df3c1958f5c4b1187e124.zip
Merge branch 'yuzu-emu:master' into convert_legacy
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp86
-rw-r--r--src/video_core/gpu.cpp44
-rw-r--r--src/video_core/gpu.h5
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt2
-rw-r--r--src/video_core/host_shaders/convert_abgr8_to_d24s8.frag18
-rw-r--r--src/video_core/host_shaders/convert_d24s8_to_abgr8.frag23
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h12
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h1
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp155
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h25
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp202
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h14
-rw-r--r--src/video_core/shader_notify.cpp2
-rw-r--r--src/video_core/shader_notify.h2
-rw-r--r--src/video_core/surface.cpp7
-rw-r--r--src/video_core/surface.h14
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp2
-rw-r--r--src/video_core/texture_cache/formatter.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h105
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h5
-rw-r--r--src/video_core/texture_cache/types.h1
-rw-r--r--src/video_core/texture_cache/util.cpp9
-rw-r--r--src/video_core/video_core.cpp6
-rw-r--r--src/video_core/video_core.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp11
28 files changed, 698 insertions, 130 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 91a30fef7..6a6325e38 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,7 @@ add_subdirectory(host_shaders)
3if(LIBVA_FOUND) 3if(LIBVA_FOUND)
4 set_source_files_properties(command_classes/codecs/codec.cpp 4 set_source_files_properties(command_classes/codecs/codec.cpp
5 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) 5 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
6 list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
6endif() 7endif()
7 8
8add_library(video_core STATIC 9add_library(video_core STATIC
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 916277811..2a532b883 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <cstdio>
5#include <fstream> 7#include <fstream>
6#include <vector> 8#include <vector>
7#include "common/assert.h" 9#include "common/assert.h"
@@ -15,12 +17,28 @@
15 17
16extern "C" { 18extern "C" {
17#include <libavutil/opt.h> 19#include <libavutil/opt.h>
20#ifdef LIBVA_FOUND
21// for querying VAAPI driver information
22#include <libavutil/hwcontext_vaapi.h>
23#endif
18} 24}
19 25
20namespace Tegra { 26namespace Tegra {
21namespace { 27namespace {
22constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; 28constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
23constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; 29constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
30constexpr std::array PREFERRED_GPU_DECODERS = {
31 AV_HWDEVICE_TYPE_CUDA,
32#ifdef _WIN32
33 AV_HWDEVICE_TYPE_D3D11VA,
34 AV_HWDEVICE_TYPE_DXVA2,
35#elif defined(__linux__)
36 AV_HWDEVICE_TYPE_VAAPI,
37 AV_HWDEVICE_TYPE_VDPAU,
38#endif
39 // last resort for Linux Flatpak (w/ NVIDIA)
40 AV_HWDEVICE_TYPE_VULKAN,
41};
24 42
25void AVPacketDeleter(AVPacket* ptr) { 43void AVPacketDeleter(AVPacket* ptr) {
26 av_packet_free(&ptr); 44 av_packet_free(&ptr);
@@ -59,46 +77,50 @@ Codec::~Codec() {
59 av_buffer_unref(&av_gpu_decoder); 77 av_buffer_unref(&av_gpu_decoder);
60} 78}
61 79
80// List all the currently available hwcontext in ffmpeg
81static std::vector<AVHWDeviceType> ListSupportedContexts() {
82 std::vector<AVHWDeviceType> contexts{};
83 AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
84 do {
85 current_device_type = av_hwdevice_iterate_types(current_device_type);
86 contexts.push_back(current_device_type);
87 } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
88 return contexts;
89}
90
62bool Codec::CreateGpuAvDevice() { 91bool Codec::CreateGpuAvDevice() {
63#if defined(LIBVA_FOUND)
64 static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
65 "i915",
66 "iHD",
67 "amdgpu",
68 };
69 AVDictionary* hwdevice_options = nullptr;
70 av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
71 for (const auto& driver : VAAPI_DRIVERS) {
72 av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
73 const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
74 nullptr, hwdevice_options, 0);
75 if (hwdevice_error >= 0) {
76 LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
77 av_dict_free(&hwdevice_options);
78 av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
79 return true;
80 }
81 LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
82 }
83 LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
84 av_dict_free(&hwdevice_options);
85#endif
86 static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; 92 static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
87 static constexpr std::array GPU_DECODER_TYPES{ 93 static const auto supported_contexts = ListSupportedContexts();
88 AV_HWDEVICE_TYPE_CUDA, 94 for (const auto& type : PREFERRED_GPU_DECODERS) {
89#ifdef _WIN32 95 if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
90 AV_HWDEVICE_TYPE_D3D11VA, 96 [&type](const auto& context) { return context == type; })) {
91#else 97 LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
92 AV_HWDEVICE_TYPE_VDPAU, 98 continue;
93#endif 99 }
94 };
95 for (const auto& type : GPU_DECODER_TYPES) {
96 const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); 100 const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
97 if (hwdevice_res < 0) { 101 if (hwdevice_res < 0) {
98 LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", 102 LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
99 av_hwdevice_get_type_name(type), hwdevice_res); 103 av_hwdevice_get_type_name(type), hwdevice_res);
100 continue; 104 continue;
101 } 105 }
106#ifdef LIBVA_FOUND
107 if (type == AV_HWDEVICE_TYPE_VAAPI) {
108 // we need to determine if this is an impersonated VAAPI driver
109 AVHWDeviceContext* hwctx =
110 static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
111 AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
112 const char* vendor_name = vaQueryVendorString(vactx->display);
113 if (strstr(vendor_name, "VDPAU backend")) {
114 // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
115 LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
116 continue;
117 } else {
118 // according to some user testing, certain vaapi driver (Intel?) could be buggy
119 // so let's log the driver name which may help the developers/supporters
120 LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
121 }
122 }
123#endif
102 for (int i = 0;; i++) { 124 for (int i = 0;; i++) {
103 const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); 125 const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
104 if (!config) { 126 if (!config) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ab7c21a49..8788f5148 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -185,16 +185,6 @@ struct GPU::Impl {
185 return *dma_pusher; 185 return *dma_pusher;
186 } 186 }
187 187
188 /// Returns a reference to the GPU CDMA pusher.
189 [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
190 return *cdma_pusher;
191 }
192
193 /// Returns a const reference to the GPU CDMA pusher.
194 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
195 return *cdma_pusher;
196 }
197
198 /// Returns a reference to the underlying renderer. 188 /// Returns a reference to the underlying renderer.
199 [[nodiscard]] VideoCore::RendererBase& Renderer() { 189 [[nodiscard]] VideoCore::RendererBase& Renderer() {
200 return *renderer; 190 return *renderer;
@@ -338,25 +328,27 @@ struct GPU::Impl {
338 } 328 }
339 329
340 /// Push GPU command buffer entries to be processed 330 /// Push GPU command buffer entries to be processed
341 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { 331 void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
342 if (!use_nvdec) { 332 if (!use_nvdec) {
343 return; 333 return;
344 } 334 }
345 335
346 if (!cdma_pusher) { 336 if (!cdma_pushers.contains(id)) {
347 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu); 337 cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu));
348 } 338 }
349 339
350 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working 340 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
351 // TODO(ameerj): RE proper async nvdec operation 341 // TODO(ameerj): RE proper async nvdec operation
352 // gpu_thread.SubmitCommandBuffer(std::move(entries)); 342 // gpu_thread.SubmitCommandBuffer(std::move(entries));
353 343 cdma_pushers[id]->ProcessEntries(std::move(entries));
354 cdma_pusher->ProcessEntries(std::move(entries));
355 } 344 }
356 345
357 /// Frees the CDMAPusher instance to free up resources 346 /// Frees the CDMAPusher instance to free up resources
358 void ClearCdmaInstance() { 347 void ClearCdmaInstance(u32 id) {
359 cdma_pusher.reset(); 348 const auto iter = cdma_pushers.find(id);
349 if (iter != cdma_pushers.end()) {
350 cdma_pushers.erase(iter);
351 }
360 } 352 }
361 353
362 /// Swap buffers (render frame) 354 /// Swap buffers (render frame)
@@ -659,7 +651,7 @@ struct GPU::Impl {
659 Core::System& system; 651 Core::System& system;
660 std::unique_ptr<Tegra::MemoryManager> memory_manager; 652 std::unique_ptr<Tegra::MemoryManager> memory_manager;
661 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 653 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
662 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; 654 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
663 std::unique_ptr<VideoCore::RendererBase> renderer; 655 std::unique_ptr<VideoCore::RendererBase> renderer;
664 VideoCore::RasterizerInterface* rasterizer = nullptr; 656 VideoCore::RasterizerInterface* rasterizer = nullptr;
665 const bool use_nvdec; 657 const bool use_nvdec;
@@ -811,14 +803,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const {
811 return impl->DmaPusher(); 803 return impl->DmaPusher();
812} 804}
813 805
814Tegra::CDmaPusher& GPU::CDmaPusher() {
815 return impl->CDmaPusher();
816}
817
818const Tegra::CDmaPusher& GPU::CDmaPusher() const {
819 return impl->CDmaPusher();
820}
821
822VideoCore::RendererBase& GPU::Renderer() { 806VideoCore::RendererBase& GPU::Renderer() {
823 return impl->Renderer(); 807 return impl->Renderer();
824} 808}
@@ -887,12 +871,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
887 impl->PushGPUEntries(std::move(entries)); 871 impl->PushGPUEntries(std::move(entries));
888} 872}
889 873
890void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { 874void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
891 impl->PushCommandBuffer(entries); 875 impl->PushCommandBuffer(id, entries);
892} 876}
893 877
894void GPU::ClearCdmaInstance() { 878void GPU::ClearCdmaInstance(u32 id) {
895 impl->ClearCdmaInstance(); 879 impl->ClearCdmaInstance(id);
896} 880}
897 881
898void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 882void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 05e5c94f3..500411176 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -83,6 +83,7 @@ enum class DepthFormat : u32 {
83 S8_UINT_Z24_UNORM = 0x14, 83 S8_UINT_Z24_UNORM = 0x14,
84 D24X8_UNORM = 0x15, 84 D24X8_UNORM = 0x15,
85 D24S8_UNORM = 0x16, 85 D24S8_UNORM = 0x16,
86 S8_UINT = 0x17,
86 D24C8_UNORM = 0x18, 87 D24C8_UNORM = 0x18,
87 D32_FLOAT_S8X24_UINT = 0x19, 88 D32_FLOAT_S8X24_UINT = 0x19,
88}; 89};
@@ -241,10 +242,10 @@ public:
241 void PushGPUEntries(Tegra::CommandList&& entries); 242 void PushGPUEntries(Tegra::CommandList&& entries);
242 243
243 /// Push GPU command buffer entries to be processed 244 /// Push GPU command buffer entries to be processed
244 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); 245 void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
245 246
246 /// Frees the CDMAPusher instance to free up resources 247 /// Frees the CDMAPusher instance to free up resources
247 void ClearCdmaInstance(); 248 void ClearCdmaInstance(u32 id);
248 249
249 /// Swap buffers (render frame) 250 /// Swap buffers (render frame)
250 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 251 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index d779a967a..fd3e41434 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -10,6 +10,8 @@ set(SHADER_FILES
10 astc_decoder.comp 10 astc_decoder.comp
11 block_linear_unswizzle_2d.comp 11 block_linear_unswizzle_2d.comp
12 block_linear_unswizzle_3d.comp 12 block_linear_unswizzle_3d.comp
13 convert_abgr8_to_d24s8.frag
14 convert_d24s8_to_abgr8.frag
13 convert_depth_to_float.frag 15 convert_depth_to_float.frag
14 convert_float_to_depth.frag 16 convert_float_to_depth.frag
15 full_screen_triangle.vert 17 full_screen_triangle.vert
diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
new file mode 100644
index 000000000..ea055ddad
--- /dev/null
+++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
@@ -0,0 +1,18 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6#extension GL_ARB_shader_stencil_export : require
7
8layout(binding = 0) uniform sampler2D color_texture;
9
10void main() {
11 ivec2 coord = ivec2(gl_FragCoord.xy);
12 uvec4 color = uvec4(texelFetch(color_texture, coord, 0).abgr * (exp2(8) - 1.0f));
13 uvec4 bytes = color << uvec4(24, 16, 8, 0);
14 uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w;
15
16 gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
17 gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
18}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
new file mode 100644
index 000000000..94368fb59
--- /dev/null
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D depth_tex;
8layout(binding = 1) uniform isampler2D stencil_tex;
9
10layout(location = 0) out vec4 color;
11
12void main() {
13 ivec2 coord = ivec2(gl_FragCoord.xy);
14 uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
15 uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
16
17 highp uint depth_val =
18 uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
19 lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
20 highp uvec4 components =
21 uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
22 color.abgr = vec4(components) / (exp2(8.0) - 1.0);
23}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 6956535e5..14e6522f2 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -9,6 +9,7 @@
9 9
10#include <glad/glad.h> 10#include <glad/glad.h>
11 11
12#include "common/bit_util.h"
12#include "common/literals.h" 13#include "common/literals.h"
13#include "common/settings.h" 14#include "common/settings.h"
14#include "video_core/renderer_opengl/gl_device.h" 15#include "video_core/renderer_opengl/gl_device.h"
@@ -148,6 +149,8 @@ GLenum AttachmentType(PixelFormat format) {
148 switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { 149 switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
149 case SurfaceType::Depth: 150 case SurfaceType::Depth:
150 return GL_DEPTH_ATTACHMENT; 151 return GL_DEPTH_ATTACHMENT;
152 case SurfaceType::Stencil:
153 return GL_STENCIL_ATTACHMENT;
151 case SurfaceType::DepthStencil: 154 case SurfaceType::DepthStencil:
152 return GL_DEPTH_STENCIL_ATTACHMENT; 155 return GL_DEPTH_STENCIL_ATTACHMENT;
153 default: 156 default:
@@ -317,13 +320,12 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
317 } 320 }
318} 321}
319 322
320OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { 323OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format,
324 GLsizei gl_num_levels) {
321 const GLenum target = ImageTarget(info); 325 const GLenum target = ImageTarget(info);
322 const GLsizei width = info.size.width; 326 const GLsizei width = info.size.width;
323 const GLsizei height = info.size.height; 327 const GLsizei height = info.size.height;
324 const GLsizei depth = info.size.depth; 328 const GLsizei depth = info.size.depth;
325 const int max_host_mip_levels = std::bit_width(info.size.width);
326 const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
327 const GLsizei num_layers = info.resources.layers; 329 const GLsizei num_layers = info.resources.layers;
328 const GLsizei num_samples = info.num_samples; 330 const GLsizei num_samples = info.num_samples;
329 331
@@ -335,10 +337,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
335 } 337 }
336 switch (target) { 338 switch (target) {
337 case GL_TEXTURE_1D_ARRAY: 339 case GL_TEXTURE_1D_ARRAY:
338 glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); 340 glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, num_layers);
339 break; 341 break;
340 case GL_TEXTURE_2D_ARRAY: 342 case GL_TEXTURE_2D_ARRAY:
341 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); 343 glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, num_layers);
342 break; 344 break;
343 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { 345 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
344 // TODO: Where should 'fixedsamplelocations' come from? 346 // TODO: Where should 'fixedsamplelocations' come from?
@@ -348,10 +350,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
348 break; 350 break;
349 } 351 }
350 case GL_TEXTURE_RECTANGLE: 352 case GL_TEXTURE_RECTANGLE:
351 glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); 353 glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, height);
352 break; 354 break;
353 case GL_TEXTURE_3D: 355 case GL_TEXTURE_3D:
354 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); 356 glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, depth);
355 break; 357 break;
356 case GL_TEXTURE_BUFFER: 358 case GL_TEXTURE_BUFFER:
357 UNREACHABLE(); 359 UNREACHABLE();
@@ -397,9 +399,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
397 return GL_R32UI; 399 return GL_R32UI;
398} 400}
399 401
400[[nodiscard]] u32 NextPow2(u32 value) {
401 return 1U << (32U - std::countl_zero(value - 1U));
402}
403} // Anonymous namespace 402} // Anonymous namespace
404 403
405ImageBufferMap::~ImageBufferMap() { 404ImageBufferMap::~ImageBufferMap() {
@@ -526,8 +525,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
526 } 525 }
527} 526}
528 527
529void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, 528void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
530 std::span<const VideoCommon::ImageCopy> copies) { 529 std::span<const VideoCommon::ImageCopy> copies) {
531 LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); 530 LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
532 format_conversion_pass.ConvertImage(dst, src, copies); 531 format_conversion_pass.ConvertImage(dst, src, copies);
533} 532}
@@ -696,7 +695,9 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
696 gl_format = tuple.format; 695 gl_format = tuple.format;
697 gl_type = tuple.type; 696 gl_type = tuple.type;
698 } 697 }
699 texture = MakeImage(info, gl_internal_format); 698 const int max_host_mip_levels = std::bit_width(info.size.width);
699 gl_num_levels = std::min(info.resources.levels, max_host_mip_levels);
700 texture = MakeImage(info, gl_internal_format, gl_num_levels);
700 current_texture = texture.handle; 701 current_texture = texture.handle;
701 if (runtime->device.HasDebuggingToolAttached()) { 702 if (runtime->device.HasDebuggingToolAttached()) {
702 const std::string name = VideoCommon::Name(*this); 703 const std::string name = VideoCommon::Name(*this);
@@ -724,6 +725,9 @@ void Image::UploadMemory(const ImageBufferMap& map,
724 u32 current_image_height = std::numeric_limits<u32>::max(); 725 u32 current_image_height = std::numeric_limits<u32>::max();
725 726
726 for (const VideoCommon::BufferImageCopy& copy : copies) { 727 for (const VideoCommon::BufferImageCopy& copy : copies) {
728 if (copy.image_subresource.base_level >= gl_num_levels) {
729 continue;
730 }
727 if (current_row_length != copy.buffer_row_length) { 731 if (current_row_length != copy.buffer_row_length) {
728 current_row_length = copy.buffer_row_length; 732 current_row_length = copy.buffer_row_length;
729 glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); 733 glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
@@ -753,6 +757,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
753 u32 current_image_height = std::numeric_limits<u32>::max(); 757 u32 current_image_height = std::numeric_limits<u32>::max();
754 758
755 for (const VideoCommon::BufferImageCopy& copy : copies) { 759 for (const VideoCommon::BufferImageCopy& copy : copies) {
760 if (copy.image_subresource.base_level >= gl_num_levels) {
761 continue;
762 }
756 if (current_row_length != copy.buffer_row_length) { 763 if (current_row_length != copy.buffer_row_length) {
757 current_row_length = copy.buffer_row_length; 764 current_row_length = copy.buffer_row_length;
758 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); 765 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
@@ -792,7 +799,7 @@ GLuint Image::StorageHandle() noexcept {
792 } 799 }
793 store_view.Create(); 800 store_view.Create();
794 glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, 801 glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0,
795 info.resources.levels, 0, info.resources.layers); 802 gl_num_levels, 0, info.resources.layers);
796 return store_view.handle; 803 return store_view.handle;
797 default: 804 default:
798 return current_texture; 805 return current_texture;
@@ -907,6 +914,8 @@ void Image::Scale(bool up_scale) {
907 return GL_COLOR_ATTACHMENT0; 914 return GL_COLOR_ATTACHMENT0;
908 case SurfaceType::Depth: 915 case SurfaceType::Depth:
909 return GL_DEPTH_ATTACHMENT; 916 return GL_DEPTH_ATTACHMENT;
917 case SurfaceType::Stencil:
918 return GL_STENCIL_ATTACHMENT;
910 case SurfaceType::DepthStencil: 919 case SurfaceType::DepthStencil:
911 return GL_DEPTH_STENCIL_ATTACHMENT; 920 return GL_DEPTH_STENCIL_ATTACHMENT;
912 default: 921 default:
@@ -920,8 +929,10 @@ void Image::Scale(bool up_scale) {
920 return GL_COLOR_BUFFER_BIT; 929 return GL_COLOR_BUFFER_BIT;
921 case SurfaceType::Depth: 930 case SurfaceType::Depth:
922 return GL_DEPTH_BUFFER_BIT; 931 return GL_DEPTH_BUFFER_BIT;
932 case SurfaceType::Stencil:
933 return GL_STENCIL_BUFFER_BIT;
923 case SurfaceType::DepthStencil: 934 case SurfaceType::DepthStencil:
924 return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; 935 return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
925 default: 936 default:
926 UNREACHABLE(); 937 UNREACHABLE();
927 return GL_COLOR_BUFFER_BIT; 938 return GL_COLOR_BUFFER_BIT;
@@ -933,8 +944,10 @@ void Image::Scale(bool up_scale) {
933 return 0; 944 return 0;
934 case SurfaceType::Depth: 945 case SurfaceType::Depth:
935 return 1; 946 return 1;
936 case SurfaceType::DepthStencil: 947 case SurfaceType::Stencil:
937 return 2; 948 return 2;
949 case SurfaceType::DepthStencil:
950 return 3;
938 default: 951 default:
939 UNREACHABLE(); 952 UNREACHABLE();
940 return 0; 953 return 0;
@@ -956,7 +969,7 @@ void Image::Scale(bool up_scale) {
956 auto dst_info = info; 969 auto dst_info = info;
957 dst_info.size.width = scaled_width; 970 dst_info.size.width = scaled_width;
958 dst_info.size.height = scaled_height; 971 dst_info.size.height = scaled_height;
959 upscaled_backup = MakeImage(dst_info, gl_internal_format); 972 upscaled_backup = MakeImage(dst_info, gl_internal_format, gl_num_levels);
960 } 973 }
961 const u32 src_width = up_scale ? original_width : scaled_width; 974 const u32 src_width = up_scale ? original_width : scaled_width;
962 const u32 src_height = up_scale ? original_height : scaled_height; 975 const u32 src_height = up_scale ? original_height : scaled_height;
@@ -1264,10 +1277,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1264 } 1277 }
1265 1278
1266 if (const ImageView* const image_view = depth_buffer; image_view) { 1279 if (const ImageView* const image_view = depth_buffer; image_view) {
1267 if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { 1280 switch (GetFormatType(image_view->format)) {
1281 case SurfaceType::Depth:
1282 buffer_bits |= GL_DEPTH_BUFFER_BIT;
1283 break;
1284 case SurfaceType::Stencil:
1285 buffer_bits |= GL_STENCIL_BUFFER_BIT;
1286 break;
1287 case SurfaceType::DepthStencil:
1268 buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; 1288 buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1269 } else { 1289 break;
1290 default:
1291 UNREACHABLE();
1270 buffer_bits |= GL_DEPTH_BUFFER_BIT; 1292 buffer_bits |= GL_DEPTH_BUFFER_BIT;
1293 break;
1271 } 1294 }
1272 const GLenum attachment = AttachmentType(image_view->format); 1295 const GLenum attachment = AttachmentType(image_view->format);
1273 AttachTexture(handle, attachment, image_view); 1296 AttachTexture(handle, attachment, image_view);
@@ -1308,7 +1331,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
1308 const u32 copy_size = region.width * region.height * region.depth * img_bpp; 1331 const u32 copy_size = region.width * region.height * region.depth * img_bpp;
1309 if (pbo_size < copy_size) { 1332 if (pbo_size < copy_size) {
1310 intermediate_pbo.Create(); 1333 intermediate_pbo.Create();
1311 pbo_size = NextPow2(copy_size); 1334 pbo_size = Common::NextPow2(copy_size);
1312 glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY); 1335 glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY);
1313 } 1336 }
1314 // Copy from source to PBO 1337 // Copy from source to PBO
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 578f8d523..37d5e6a6b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -84,9 +84,13 @@ public:
84 84
85 u64 GetDeviceLocalMemory() const; 85 u64 GetDeviceLocalMemory() const;
86 86
87 bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
88 return true;
89 }
90
87 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 91 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
88 92
89 void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 93 void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
90 94
91 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { 95 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
92 UNIMPLEMENTED(); 96 UNIMPLEMENTED();
@@ -164,8 +168,8 @@ private:
164 168
165 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; 169 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
166 170
167 std::array<OGLFramebuffer, 3> rescale_draw_fbos; 171 std::array<OGLFramebuffer, 4> rescale_draw_fbos;
168 std::array<OGLFramebuffer, 3> rescale_read_fbos; 172 std::array<OGLFramebuffer, 4> rescale_read_fbos;
169 const Settings::ResolutionScalingInfo& resolution; 173 const Settings::ResolutionScalingInfo& resolution;
170}; 174};
171 175
@@ -221,6 +225,7 @@ private:
221 GLenum gl_internal_format = GL_NONE; 225 GLenum gl_internal_format = GL_NONE;
222 GLenum gl_format = GL_NONE; 226 GLenum gl_format = GL_NONE;
223 GLenum gl_type = GL_NONE; 227 GLenum gl_type = GL_NONE;
228 GLsizei gl_num_levels{};
224 TextureCacheRuntime* runtime{}; 229 TextureCacheRuntime* runtime{};
225 GLuint current_texture{}; 230 GLuint current_texture{};
226}; 231};
@@ -338,7 +343,6 @@ struct TextureCacheParams {
338 static constexpr bool FRAMEBUFFER_BLITS = true; 343 static constexpr bool FRAMEBUFFER_BLITS = true;
339 static constexpr bool HAS_EMULATED_COPIES = true; 344 static constexpr bool HAS_EMULATED_COPIES = true;
340 static constexpr bool HAS_DEVICE_MEMORY_INFO = true; 345 static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
341 static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true;
342 346
343 using Runtime = OpenGL::TextureCacheRuntime; 347 using Runtime = OpenGL::TextureCacheRuntime;
344 using Image = OpenGL::Image; 348 using Image = OpenGL::Image;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 39158aa3e..daba42ed9 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -108,6 +108,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
108 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT 108 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
109 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT 109 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
110 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM 110 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
111 {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT
111 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT 112 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
112 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM 113 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
113 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, 114 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b3884a4f5..9a38b6b34 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -4,6 +4,8 @@
4 4
5#include <algorithm> 5#include <algorithm>
6 6
7#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
8#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
7#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" 9#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
8#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" 10#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
9#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" 11#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
@@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
354 blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), 356 blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
355 convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), 357 convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
356 convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), 358 convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
359 convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
360 convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
357 linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), 361 linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
358 nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { 362 nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
359 if (device.IsExtShaderStencilExportSupported()) { 363 if (device.IsExtShaderStencilExportSupported()) {
@@ -448,6 +452,22 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
448 Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); 452 Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
449} 453}
450 454
455void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
456 ImageView& src_image_view, u32 up_scale, u32 down_shift) {
457 ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
458 convert_abgr8_to_d24s8_frag, true);
459 ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
460 down_shift);
461}
462
463void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
464 ImageView& src_image_view, u32 up_scale, u32 down_shift) {
465 ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
466 convert_d24s8_to_abgr8_frag, false);
467 ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale,
468 down_shift);
469}
470
451void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, 471void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
452 const ImageView& src_image_view, u32 up_scale, u32 down_shift) { 472 const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
453 const VkPipelineLayout layout = *one_texture_pipeline_layout; 473 const VkPipelineLayout layout = *one_texture_pipeline_layout;
@@ -495,6 +515,101 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
495 scheduler.InvalidateState(); 515 scheduler.InvalidateState();
496} 516}
497 517
518void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
519 ImageView& src_image_view, u32 up_scale, u32 down_shift) {
520 const VkPipelineLayout layout = *one_texture_pipeline_layout;
521 const VkImageView src_view = src_image_view.ColorView();
522 const VkSampler sampler = *nearest_sampler;
523 const VkExtent2D extent{
524 .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
525 .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
526 };
527 scheduler.RequestRenderpass(dst_framebuffer);
528 scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
529 this](vk::CommandBuffer cmdbuf) {
530 const VkOffset2D offset{
531 .x = 0,
532 .y = 0,
533 };
534 const VkViewport viewport{
535 .x = 0.0f,
536 .y = 0.0f,
537 .width = static_cast<float>(extent.width),
538 .height = static_cast<float>(extent.height),
539 .minDepth = 0.0f,
540 .maxDepth = 0.0f,
541 };
542 const VkRect2D scissor{
543 .offset = offset,
544 .extent = extent,
545 };
546 const PushConstants push_constants{
547 .tex_scale = {viewport.width, viewport.height},
548 .tex_offset = {0.0f, 0.0f},
549 };
550 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
551 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
552
553 // TODO: Barriers
554 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
555 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
556 nullptr);
557 cmdbuf.SetViewport(0, viewport);
558 cmdbuf.SetScissor(0, scissor);
559 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
560 cmdbuf.Draw(3, 1, 0, 0);
561 });
562 scheduler.InvalidateState();
563}
564
565void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
566 ImageView& src_image_view, u32 up_scale, u32 down_shift) {
567 const VkPipelineLayout layout = *two_textures_pipeline_layout;
568 const VkImageView src_depth_view = src_image_view.DepthView();
569 const VkImageView src_stencil_view = src_image_view.StencilView();
570 const VkSampler sampler = *nearest_sampler;
571 const VkExtent2D extent{
572 .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
573 .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
574 };
575 scheduler.RequestRenderpass(dst_framebuffer);
576 scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale,
577 down_shift, this](vk::CommandBuffer cmdbuf) {
578 const VkOffset2D offset{
579 .x = 0,
580 .y = 0,
581 };
582 const VkViewport viewport{
583 .x = 0.0f,
584 .y = 0.0f,
585 .width = static_cast<float>(extent.width),
586 .height = static_cast<float>(extent.height),
587 .minDepth = 0.0f,
588 .maxDepth = 0.0f,
589 };
590 const VkRect2D scissor{
591 .offset = offset,
592 .extent = extent,
593 };
594 const PushConstants push_constants{
595 .tex_scale = {viewport.width, viewport.height},
596 .tex_offset = {0.0f, 0.0f},
597 };
598 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
599 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
600 src_stencil_view);
601 // TODO: Barriers
602 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
603 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
604 nullptr);
605 cmdbuf.SetViewport(0, viewport);
606 cmdbuf.SetScissor(0, scissor);
607 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
608 cmdbuf.Draw(3, 1, 0, 0);
609 });
610 scheduler.InvalidateState();
611}
612
498VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { 613VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
499 const auto it = std::ranges::find(blit_color_keys, key); 614 const auto it = std::ranges::find(blit_color_keys, key);
500 if (it != blit_color_keys.end()) { 615 if (it != blit_color_keys.end()) {
@@ -636,4 +751,44 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend
636 }); 751 });
637} 752}
638 753
754void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
755 vk::ShaderModule& module, bool is_target_depth,
756 bool single_texture) {
757 if (pipeline) {
758 return;
759 }
760 const std::array stages = MakeStages(*full_screen_vert, *module);
761 pipeline = device.GetLogical().CreateGraphicsPipeline({
762 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
763 .pNext = nullptr,
764 .flags = 0,
765 .stageCount = static_cast<u32>(stages.size()),
766 .pStages = stages.data(),
767 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
768 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
769 .pTessellationState = nullptr,
770 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
771 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
772 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
773 .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
774 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
775 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
776 .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout,
777 .renderPass = renderpass,
778 .subpass = 0,
779 .basePipelineHandle = VK_NULL_HANDLE,
780 .basePipelineIndex = 0,
781 });
782}
783
784void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
785 vk::ShaderModule& module, bool single_texture) {
786 ConvertPipelineEx(pipeline, renderpass, module, false, single_texture);
787}
788
789void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
790 vk::ShaderModule& module, bool single_texture) {
791 ConvertPipelineEx(pipeline, renderpass, module, true, single_texture);
792}
793
639} // namespace Vulkan 794} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index d77f76678..b1a717090 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -56,10 +56,22 @@ public:
56 void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, 56 void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
57 u32 up_scale, u32 down_shift); 57 u32 up_scale, u32 down_shift);
58 58
59 void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
60 u32 up_scale, u32 down_shift);
61
62 void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
63 u32 up_scale, u32 down_shift);
64
59private: 65private:
60 void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, 66 void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
61 const ImageView& src_image_view, u32 up_scale, u32 down_shift); 67 const ImageView& src_image_view, u32 up_scale, u32 down_shift);
62 68
69 void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
70 ImageView& src_image_view, u32 up_scale, u32 down_shift);
71
72 void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
73 ImageView& src_image_view, u32 up_scale, u32 down_shift);
74
63 [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); 75 [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
64 76
65 [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); 77 [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
@@ -68,6 +80,15 @@ private:
68 80
69 void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); 81 void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
70 82
83 void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
84 vk::ShaderModule& module, bool is_target_depth, bool single_texture);
85
86 void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
87 vk::ShaderModule& module, bool single_texture);
88
89 void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
90 vk::ShaderModule& module, bool single_texture);
91
71 const Device& device; 92 const Device& device;
72 VKScheduler& scheduler; 93 VKScheduler& scheduler;
73 StateTracker& state_tracker; 94 StateTracker& state_tracker;
@@ -83,6 +104,8 @@ private:
83 vk::ShaderModule blit_depth_stencil_frag; 104 vk::ShaderModule blit_depth_stencil_frag;
84 vk::ShaderModule convert_depth_to_float_frag; 105 vk::ShaderModule convert_depth_to_float_frag;
85 vk::ShaderModule convert_float_to_depth_frag; 106 vk::ShaderModule convert_float_to_depth_frag;
107 vk::ShaderModule convert_abgr8_to_d24s8_frag;
108 vk::ShaderModule convert_d24s8_to_abgr8_frag;
86 vk::Sampler linear_sampler; 109 vk::Sampler linear_sampler;
87 vk::Sampler nearest_sampler; 110 vk::Sampler nearest_sampler;
88 111
@@ -94,6 +117,8 @@ private:
94 vk::Pipeline convert_r32_to_d32_pipeline; 117 vk::Pipeline convert_r32_to_d32_pipeline;
95 vk::Pipeline convert_d16_to_r16_pipeline; 118 vk::Pipeline convert_d16_to_r16_pipeline;
96 vk::Pipeline convert_r16_to_d16_pipeline; 119 vk::Pipeline convert_r16_to_d16_pipeline;
120 vk::Pipeline convert_abgr8_to_d24s8_pipeline;
121 vk::Pipeline convert_d24s8_to_abgr8_pipeline;
97}; 122};
98 123
99} // namespace Vulkan 124} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 68a23b602..751e4792b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -162,7 +162,7 @@ struct FormatTuple {
162 {VK_FORMAT_UNDEFINED}, // R16_SINT 162 {VK_FORMAT_UNDEFINED}, // R16_SINT
163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT 164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
165 {VK_FORMAT_UNDEFINED}, // R16G16_UINT 165 {VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT
166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT 166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM 167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT 168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
@@ -176,8 +176,8 @@ struct FormatTuple {
176 {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT 176 {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT
177 {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT 177 {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT
178 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM 178 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
179 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM 179 {VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM
180 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM 180 {VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM
181 {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB 181 {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
182 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB 182 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
183 {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB 183 {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
@@ -208,6 +208,9 @@ struct FormatTuple {
208 {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT 208 {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
209 {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM 209 {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
210 210
211 // Stencil formats
212 {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT
213
211 // DepthStencil formats 214 // DepthStencil formats
212 {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT 215 {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT
213 {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) 216 {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated)
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 407fd2a15..197cba8e3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/bit_cast.h" 10#include "common/bit_cast.h"
11#include "common/bit_util.h"
11#include "common/settings.h" 12#include "common/settings.h"
12 13
13#include "video_core/engines/fermi_2d.h" 14#include "video_core/engines/fermi_2d.h"
@@ -102,6 +103,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
102 usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; 103 usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
103 break; 104 break;
104 case VideoCore::Surface::SurfaceType::Depth: 105 case VideoCore::Surface::SurfaceType::Depth:
106 case VideoCore::Surface::SurfaceType::Stencil:
105 case VideoCore::Surface::SurfaceType::DepthStencil: 107 case VideoCore::Surface::SurfaceType::DepthStencil:
106 usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; 108 usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
107 break; 109 break;
@@ -173,6 +175,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
173 return VK_IMAGE_ASPECT_COLOR_BIT; 175 return VK_IMAGE_ASPECT_COLOR_BIT;
174 case VideoCore::Surface::SurfaceType::Depth: 176 case VideoCore::Surface::SurfaceType::Depth:
175 return VK_IMAGE_ASPECT_DEPTH_BIT; 177 return VK_IMAGE_ASPECT_DEPTH_BIT;
178 case VideoCore::Surface::SurfaceType::Stencil:
179 return VK_IMAGE_ASPECT_STENCIL_BIT;
176 case VideoCore::Surface::SurfaceType::DepthStencil: 180 case VideoCore::Surface::SurfaceType::DepthStencil:
177 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; 181 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
178 default: 182 default:
@@ -195,6 +199,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
195 case PixelFormat::D16_UNORM: 199 case PixelFormat::D16_UNORM:
196 case PixelFormat::D32_FLOAT: 200 case PixelFormat::D32_FLOAT:
197 return VK_IMAGE_ASPECT_DEPTH_BIT; 201 return VK_IMAGE_ASPECT_DEPTH_BIT;
202 case PixelFormat::S8_UINT:
203 return VK_IMAGE_ASPECT_STENCIL_BIT;
198 default: 204 default:
199 return VK_IMAGE_ASPECT_COLOR_BIT; 205 return VK_IMAGE_ASPECT_COLOR_BIT;
200 } 206 }
@@ -308,6 +314,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
308 }; 314 };
309} 315}
310 316
317[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src,
318 VkImageAspectFlags aspect_mask) noexcept {
319 return VkBufferImageCopy{
320 .bufferOffset = 0,
321 .bufferRowLength = 0,
322 .bufferImageHeight = 0,
323 .imageSubresource = MakeImageSubresourceLayers(
324 is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask),
325 .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset),
326 .imageExtent = MakeExtent3D(copy.extent),
327 };
328}
329
311[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 330[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
312 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 331 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
313 std::vector<VkBufferCopy> result(copies.size()); 332 std::vector<VkBufferCopy> result(copies.size());
@@ -754,6 +773,173 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
754 return staging_buffer_pool.Request(size, MemoryUsage::Download); 773 return staging_buffer_pool.Request(size, MemoryUsage::Download);
755} 774}
756 775
776bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
777 if (VideoCore::Surface::GetFormatType(dst.info.format) ==
778 VideoCore::Surface::SurfaceType::DepthStencil &&
779 !device.IsExtShaderStencilExportSupported()) {
780 return true;
781 }
782 if (VideoCore::Surface::GetFormatType(src.info.format) ==
783 VideoCore::Surface::SurfaceType::DepthStencil &&
784 !device.IsExtShaderStencilExportSupported()) {
785 return true;
786 }
787 if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT ||
788 src.info.format == PixelFormat::D32_FLOAT_S8_UINT) {
789 return true;
790 }
791 return false;
792}
793
794VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
795 const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
796 if (buffer_commits[level]) {
797 return *buffers[level];
798 }
799 const auto new_size = Common::NextPow2(needed_size);
800 static constexpr VkBufferUsageFlags flags =
801 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
802 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
803 buffers[level] = device.GetLogical().CreateBuffer({
804 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
805 .pNext = nullptr,
806 .flags = 0,
807 .size = new_size,
808 .usage = flags,
809 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
810 .queueFamilyIndexCount = 0,
811 .pQueueFamilyIndices = nullptr,
812 });
813 buffer_commits[level] = std::make_unique<MemoryCommit>(
814 memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
815 return *buffers[level];
816}
817
818void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
819 std::span<const VideoCommon::ImageCopy> copies) {
820 std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
821 std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
822 const VkImageAspectFlags src_aspect_mask = src.AspectMask();
823 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
824
825 std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) {
826 return MakeBufferImageCopy(copy, true, src_aspect_mask);
827 });
828 std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) {
829 return MakeBufferImageCopy(copy, false, dst_aspect_mask);
830 });
831 const u32 img_bpp = BytesPerBlock(src.info.format);
832 size_t total_size = 0;
833 for (const auto& copy : copies) {
834 total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp;
835 }
836 const VkBuffer copy_buffer = GetTemporaryBuffer(total_size);
837 const VkImage dst_image = dst.Handle();
838 const VkImage src_image = src.Handle();
839 scheduler.RequestOutsideRenderPassOperationContext();
840 scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask,
841 vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) {
842 RangedBarrierRange dst_range;
843 RangedBarrierRange src_range;
844 for (const VkBufferImageCopy& copy : vk_in_copies) {
845 src_range.AddLayers(copy.imageSubresource);
846 }
847 for (const VkBufferImageCopy& copy : vk_out_copies) {
848 dst_range.AddLayers(copy.imageSubresource);
849 }
850 static constexpr VkMemoryBarrier READ_BARRIER{
851 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
852 .pNext = nullptr,
853 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
854 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
855 };
856 static constexpr VkMemoryBarrier WRITE_BARRIER{
857 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
858 .pNext = nullptr,
859 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
860 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
861 };
862 const std::array pre_barriers{
863 VkImageMemoryBarrier{
864 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
865 .pNext = nullptr,
866 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
867 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
868 VK_ACCESS_TRANSFER_WRITE_BIT,
869 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
870 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
871 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
872 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
873 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
874 .image = src_image,
875 .subresourceRange = src_range.SubresourceRange(src_aspect_mask),
876 },
877 };
878 const std::array middle_in_barrier{
879 VkImageMemoryBarrier{
880 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
881 .pNext = nullptr,
882 .srcAccessMask = 0,
883 .dstAccessMask = 0,
884 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
885 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
886 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
887 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
888 .image = src_image,
889 .subresourceRange = src_range.SubresourceRange(src_aspect_mask),
890 },
891 };
892 const std::array middle_out_barrier{
893 VkImageMemoryBarrier{
894 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
895 .pNext = nullptr,
896 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
897 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
898 VK_ACCESS_TRANSFER_WRITE_BIT,
899 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
900 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
901 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
902 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
903 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
904 .image = dst_image,
905 .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
906 },
907 };
908 const std::array post_barriers{
909 VkImageMemoryBarrier{
910 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
911 .pNext = nullptr,
912 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
913 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
914 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
915 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
916 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
917 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
918 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
919 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
920 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
921 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
922 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
923 .image = dst_image,
924 .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
925 },
926 };
927 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
928 0, {}, {}, pre_barriers);
929
930 cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
931 vk_in_copies);
932 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
933 0, WRITE_BARRIER, nullptr, middle_in_barrier);
934
935 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
936 0, READ_BARRIER, {}, middle_out_barrier);
937 cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies);
938 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
939 0, {}, {}, post_barriers);
940 });
941}
942
757void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 943void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
758 const Region2D& dst_region, const Region2D& src_region, 944 const Region2D& dst_region, const Region2D& src_region,
759 Tegra::Engines::Fermi2D::Filter filter, 945 Tegra::Engines::Fermi2D::Filter filter,
@@ -881,6 +1067,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
881 return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); 1067 return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
882 } 1068 }
883 break; 1069 break;
1070 case PixelFormat::A8B8G8R8_UNORM:
1071 if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
1072 return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift);
1073 }
1074 break;
884 case PixelFormat::R32_FLOAT: 1075 case PixelFormat::R32_FLOAT:
885 if (src_view.format == PixelFormat::D32_FLOAT) { 1076 if (src_view.format == PixelFormat::D32_FLOAT) {
886 return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); 1077 return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
@@ -891,6 +1082,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
891 return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); 1082 return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
892 } 1083 }
893 break; 1084 break;
1085 case PixelFormat::S8_UINT_D24_UNORM:
1086 return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift);
1087 break;
894 case PixelFormat::D32_FLOAT: 1088 case PixelFormat::D32_FLOAT:
895 if (src_view.format == PixelFormat::R32_FLOAT) { 1089 if (src_view.format == PixelFormat::R32_FLOAT) {
896 return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); 1090 return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
@@ -1386,6 +1580,14 @@ VkImageView ImageView::StencilView() {
1386 return *stencil_view; 1580 return *stencil_view;
1387} 1581}
1388 1582
1583VkImageView ImageView::ColorView() {
1584 if (color_view) {
1585 return *color_view;
1586 }
1587 color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT);
1588 return *color_view;
1589}
1590
1389VkImageView ImageView::StorageView(Shader::TextureType texture_type, 1591VkImageView ImageView::StorageView(Shader::TextureType texture_type,
1390 Shader::ImageFormat image_format) { 1592 Shader::ImageFormat image_format) {
1391 if (image_format == Shader::ImageFormat::Typeless) { 1593 if (image_format == Shader::ImageFormat::Typeless) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f5f8f9a74..753e3e8a1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -61,6 +61,10 @@ public:
61 61
62 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 62 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
63 63
64 bool ShouldReinterpret(Image& dst, Image& src);
65
66 void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
67
64 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); 68 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
65 69
66 bool CanAccelerateImageUpload(Image&) const noexcept { 70 bool CanAccelerateImageUpload(Image&) const noexcept {
@@ -82,6 +86,8 @@ public:
82 return true; 86 return true;
83 } 87 }
84 88
89 [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
90
85 const Device& device; 91 const Device& device;
86 VKScheduler& scheduler; 92 VKScheduler& scheduler;
87 MemoryAllocator& memory_allocator; 93 MemoryAllocator& memory_allocator;
@@ -90,6 +96,10 @@ public:
90 ASTCDecoderPass& astc_decoder_pass; 96 ASTCDecoderPass& astc_decoder_pass;
91 RenderPassCache& render_pass_cache; 97 RenderPassCache& render_pass_cache;
92 const Settings::ResolutionScalingInfo& resolution; 98 const Settings::ResolutionScalingInfo& resolution;
99
100 constexpr static size_t indexing_slots = 8 * sizeof(size_t);
101 std::array<vk::Buffer, indexing_slots> buffers{};
102 std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
93}; 103};
94 104
95class Image : public VideoCommon::ImageBase { 105class Image : public VideoCommon::ImageBase {
@@ -174,6 +184,8 @@ public:
174 184
175 [[nodiscard]] VkImageView StencilView(); 185 [[nodiscard]] VkImageView StencilView();
176 186
187 [[nodiscard]] VkImageView ColorView();
188
177 [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, 189 [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
178 Shader::ImageFormat image_format); 190 Shader::ImageFormat image_format);
179 191
@@ -214,6 +226,7 @@ private:
214 std::unique_ptr<StorageViews> storage_views; 226 std::unique_ptr<StorageViews> storage_views;
215 vk::ImageView depth_view; 227 vk::ImageView depth_view;
216 vk::ImageView stencil_view; 228 vk::ImageView stencil_view;
229 vk::ImageView color_view;
217 VkImage image_handle = VK_NULL_HANDLE; 230 VkImage image_handle = VK_NULL_HANDLE;
218 VkImageView render_target = VK_NULL_HANDLE; 231 VkImageView render_target = VK_NULL_HANDLE;
219 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 232 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
@@ -316,7 +329,6 @@ struct TextureCacheParams {
316 static constexpr bool FRAMEBUFFER_BLITS = false; 329 static constexpr bool FRAMEBUFFER_BLITS = false;
317 static constexpr bool HAS_EMULATED_COPIES = false; 330 static constexpr bool HAS_EMULATED_COPIES = false;
318 static constexpr bool HAS_DEVICE_MEMORY_INFO = true; 331 static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
319 static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false;
320 332
321 using Runtime = Vulkan::TextureCacheRuntime; 333 using Runtime = Vulkan::TextureCacheRuntime;
322 using Image = Vulkan::Image; 334 using Image = Vulkan::Image;
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index dc6995b46..bcaf5f575 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept {
18 const int now_complete = num_complete.load(std::memory_order::relaxed); 18 const int now_complete = num_complete.load(std::memory_order::relaxed);
19 const int now_building = num_building.load(std::memory_order::relaxed); 19 const int now_building = num_building.load(std::memory_order::relaxed);
20 if (now_complete == now_building) { 20 if (now_complete == now_building) {
21 const auto now = std::chrono::high_resolution_clock::now(); 21 const auto now = std::chrono::steady_clock::now();
22 if (completed && num_complete == num_when_completed) { 22 if (completed && num_complete == num_when_completed) {
23 if (now - complete_time > TIME_TO_STOP_REPORTING) { 23 if (now - complete_time > TIME_TO_STOP_REPORTING) {
24 report_base = now_complete; 24 report_base = now_complete;
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index ad363bfb5..4d8d52071 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -28,6 +28,6 @@ private:
28 28
29 bool completed{}; 29 bool completed{};
30 int num_when_completed{}; 30 int num_when_completed{};
31 std::chrono::high_resolution_clock::time_point complete_time; 31 std::chrono::steady_clock::time_point complete_time;
32}; 32};
33} // namespace VideoCore 33} // namespace VideoCore
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 58d262446..a36015c8c 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -82,6 +82,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
82 return PixelFormat::D32_FLOAT; 82 return PixelFormat::D32_FLOAT;
83 case Tegra::DepthFormat::D16_UNORM: 83 case Tegra::DepthFormat::D16_UNORM:
84 return PixelFormat::D16_UNORM; 84 return PixelFormat::D16_UNORM;
85 case Tegra::DepthFormat::S8_UINT:
86 return PixelFormat::S8_UINT;
85 case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: 87 case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
86 return PixelFormat::D32_FLOAT_S8_UINT; 88 return PixelFormat::D32_FLOAT_S8_UINT;
87 default: 89 default:
@@ -214,6 +216,11 @@ SurfaceType GetFormatType(PixelFormat pixel_format) {
214 } 216 }
215 217
216 if (static_cast<std::size_t>(pixel_format) < 218 if (static_cast<std::size_t>(pixel_format) <
219 static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) {
220 return SurfaceType::Stencil;
221 }
222
223 if (static_cast<std::size_t>(pixel_format) <
217 static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { 224 static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
218 return SurfaceType::DepthStencil; 225 return SurfaceType::DepthStencil;
219 } 226 }
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 2ce7c7d33..33e8d24ab 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -110,8 +110,12 @@ enum class PixelFormat {
110 110
111 MaxDepthFormat, 111 MaxDepthFormat,
112 112
113 // Stencil formats
114 S8_UINT = MaxDepthFormat,
115 MaxStencilFormat,
116
113 // DepthStencil formats 117 // DepthStencil formats
114 D24_UNORM_S8_UINT = MaxDepthFormat, 118 D24_UNORM_S8_UINT = MaxStencilFormat,
115 S8_UINT_D24_UNORM, 119 S8_UINT_D24_UNORM,
116 D32_FLOAT_S8_UINT, 120 D32_FLOAT_S8_UINT,
117 121
@@ -125,8 +129,9 @@ constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max
125enum class SurfaceType { 129enum class SurfaceType {
126 ColorTexture = 0, 130 ColorTexture = 0,
127 Depth = 1, 131 Depth = 1,
128 DepthStencil = 2, 132 Stencil = 2,
129 Invalid = 3, 133 DepthStencil = 3,
134 Invalid = 4,
130}; 135};
131 136
132enum class SurfaceTarget { 137enum class SurfaceTarget {
@@ -229,6 +234,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
229 1, // E5B9G9R9_FLOAT 234 1, // E5B9G9R9_FLOAT
230 1, // D32_FLOAT 235 1, // D32_FLOAT
231 1, // D16_UNORM 236 1, // D16_UNORM
237 1, // S8_UINT
232 1, // D24_UNORM_S8_UINT 238 1, // D24_UNORM_S8_UINT
233 1, // S8_UINT_D24_UNORM 239 1, // S8_UINT_D24_UNORM
234 1, // D32_FLOAT_S8_UINT 240 1, // D32_FLOAT_S8_UINT
@@ -328,6 +334,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
328 1, // E5B9G9R9_FLOAT 334 1, // E5B9G9R9_FLOAT
329 1, // D32_FLOAT 335 1, // D32_FLOAT
330 1, // D16_UNORM 336 1, // D16_UNORM
337 1, // S8_UINT
331 1, // D24_UNORM_S8_UINT 338 1, // D24_UNORM_S8_UINT
332 1, // S8_UINT_D24_UNORM 339 1, // S8_UINT_D24_UNORM
333 1, // D32_FLOAT_S8_UINT 340 1, // D32_FLOAT_S8_UINT
@@ -427,6 +434,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
427 32, // E5B9G9R9_FLOAT 434 32, // E5B9G9R9_FLOAT
428 32, // D32_FLOAT 435 32, // D32_FLOAT
429 16, // D16_UNORM 436 16, // D16_UNORM
437 8, // S8_UINT
430 32, // D24_UNORM_S8_UINT 438 32, // D24_UNORM_S8_UINT
431 32, // S8_UINT_D24_UNORM 439 32, // S8_UINT_D24_UNORM
432 64, // D32_FLOAT_S8_UINT 440 64, // D32_FLOAT_S8_UINT
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index ddfb726fe..afa807d5d 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -139,6 +139,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
139 return PixelFormat::D16_UNORM; 139 return PixelFormat::D16_UNORM;
140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): 140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
141 return PixelFormat::S8_UINT_D24_UNORM; 141 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): 144 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM; 145 return PixelFormat::S8_UINT_D24_UNORM;
144 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): 146 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index c6cf0583f..b2c81057b 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -194,6 +194,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
194 return "D32_FLOAT"; 194 return "D32_FLOAT";
195 case PixelFormat::D16_UNORM: 195 case PixelFormat::D16_UNORM:
196 return "D16_UNORM"; 196 return "D16_UNORM";
197 case PixelFormat::S8_UINT:
198 return "S8_UINT";
197 case PixelFormat::D24_UNORM_S8_UINT: 199 case PixelFormat::D24_UNORM_S8_UINT:
198 return "D24_UNORM_S8_UINT"; 200 return "D24_UNORM_S8_UINT";
199 case PixelFormat::S8_UINT_D24_UNORM: 201 case PixelFormat::S8_UINT_D24_UNORM:
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 241f71a91..5aaeb16ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -472,9 +472,10 @@ template <class P>
472void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 472void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
473 const Tegra::Engines::Fermi2D::Surface& src, 473 const Tegra::Engines::Fermi2D::Surface& src,
474 const Tegra::Engines::Fermi2D::Config& copy) { 474 const Tegra::Engines::Fermi2D::Config& copy) {
475 const BlitImages images = GetBlitImages(dst, src); 475 const BlitImages images = GetBlitImages(dst, src, copy);
476 const ImageId dst_id = images.dst_id; 476 const ImageId dst_id = images.dst_id;
477 const ImageId src_id = images.src_id; 477 const ImageId src_id = images.src_id;
478
478 PrepareImage(src_id, false, false); 479 PrepareImage(src_id, false, false);
479 PrepareImage(dst_id, true, false); 480 PrepareImage(dst_id, true, false);
480 481
@@ -758,14 +759,18 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
758 return ImageId{}; 759 return ImageId{};
759 } 760 }
760 } 761 }
761 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 762 const bool broken_views =
763 runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews);
762 const bool native_bgr = runtime.HasNativeBgr(); 764 const bool native_bgr = runtime.HasNativeBgr();
763 ImageId image_id; 765 const bool flexible_formats = True(options & RelaxedOptions::Format);
766 ImageId image_id{};
767 boost::container::small_vector<ImageId, 1> image_ids;
764 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 768 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
765 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 769 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
766 return false; 770 return false;
767 } 771 }
768 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { 772 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear)
773 [[unlikely]] {
769 const bool strict_size = False(options & RelaxedOptions::Size) && 774 const bool strict_size = False(options & RelaxedOptions::Size) &&
770 True(existing_image.flags & ImageFlagBits::Strong); 775 True(existing_image.flags & ImageFlagBits::Strong);
771 const ImageInfo& existing = existing_image.info; 776 const ImageInfo& existing = existing_image.info;
@@ -774,17 +779,27 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
774 IsPitchLinearSameSize(existing, info, strict_size) && 779 IsPitchLinearSameSize(existing, info, strict_size) &&
775 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { 780 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
776 image_id = existing_image_id; 781 image_id = existing_image_id;
777 return true; 782 image_ids.push_back(existing_image_id);
783 return !flexible_formats && existing.format == info.format;
778 } 784 }
779 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, 785 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
780 native_bgr)) { 786 native_bgr)) {
781 image_id = existing_image_id; 787 image_id = existing_image_id;
782 return true; 788 image_ids.push_back(existing_image_id);
789 return !flexible_formats && existing_image.info.format == info.format;
783 } 790 }
784 return false; 791 return false;
785 }; 792 };
786 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); 793 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
787 return image_id; 794 if (image_ids.size() <= 1) [[likely]] {
795 return image_id;
796 }
797 auto image_ids_compare = [this](ImageId a, ImageId b) {
798 auto& image_a = slot_images[a];
799 auto& image_b = slot_images[b];
800 return image_a.modification_tick < image_b.modification_tick;
801 };
802 return *std::ranges::max_element(image_ids, image_ids_compare);
788} 803}
789 804
790template <class P> 805template <class P>
@@ -1076,31 +1091,66 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1076 1091
1077template <class P> 1092template <class P>
1078typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( 1093typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1079 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { 1094 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
1080 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; 1095 const Tegra::Engines::Fermi2D::Config& copy) {
1096
1097 static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples;
1081 const GPUVAddr dst_addr = dst.Address(); 1098 const GPUVAddr dst_addr = dst.Address();
1082 const GPUVAddr src_addr = src.Address(); 1099 const GPUVAddr src_addr = src.Address();
1083 ImageInfo dst_info(dst); 1100 ImageInfo dst_info(dst);
1084 ImageInfo src_info(src); 1101 ImageInfo src_info(src);
1102 const bool can_be_depth_blit =
1103 dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point;
1085 ImageId dst_id; 1104 ImageId dst_id;
1086 ImageId src_id; 1105 ImageId src_id;
1106 RelaxedOptions try_options = FIND_OPTIONS;
1107 if (can_be_depth_blit) {
1108 try_options |= RelaxedOptions::Format;
1109 }
1087 do { 1110 do {
1088 has_deleted_images = false; 1111 has_deleted_images = false;
1089 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); 1112 src_id = FindImage(src_info, src_addr, try_options);
1090 src_id = FindImage(src_info, src_addr, FIND_OPTIONS); 1113 dst_id = FindImage(dst_info, dst_addr, try_options);
1091 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1092 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; 1114 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1093 DeduceBlitImages(dst_info, src_info, dst_image, src_image); 1115 if (src_image && src_image->info.num_samples > 1) {
1094 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { 1116 RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
1095 continue; 1117 src_id = FindOrInsertImage(src_info, src_addr, find_options);
1118 dst_id = FindOrInsertImage(dst_info, dst_addr, find_options);
1119 if (has_deleted_images) {
1120 continue;
1121 }
1122 break;
1096 } 1123 }
1097 if (!dst_id) { 1124 if (can_be_depth_blit) {
1098 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); 1125 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1126 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1127 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1128 continue;
1129 }
1099 } 1130 }
1100 if (!src_id) { 1131 if (!src_id) {
1101 src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); 1132 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1102 } 1133 }
1134 if (!dst_id) {
1135 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1136 }
1103 } while (has_deleted_images); 1137 } while (has_deleted_images);
1138 const ImageBase& src_image = slot_images[src_id];
1139 const ImageBase& dst_image = slot_images[dst_id];
1140 const bool native_bgr = runtime.HasNativeBgr();
1141 if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) ||
1142 GetFormatType(src_info.format) != GetFormatType(src_image.info.format) ||
1143 !VideoCore::Surface::IsViewCompatible(dst_info.format, dst_image.info.format, false,
1144 native_bgr) ||
1145 !VideoCore::Surface::IsViewCompatible(src_info.format, src_image.info.format, false,
1146 native_bgr)) {
1147 // Make sure the images match the expected format.
1148 do {
1149 has_deleted_images = false;
1150 src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{});
1151 dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
1152 } while (has_deleted_images);
1153 }
1104 return BlitImages{ 1154 return BlitImages{
1105 .dst_id = dst_id, 1155 .dst_id = dst_id,
1106 .src_id = src_id, 1156 .src_id = src_id,
@@ -1157,7 +1207,14 @@ template <class P>
1157ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, 1207ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1158 bool is_clear) { 1208 bool is_clear) {
1159 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; 1209 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1160 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); 1210 ImageId image_id{};
1211 bool delete_state = has_deleted_images;
1212 do {
1213 has_deleted_images = false;
1214 image_id = FindOrInsertImage(info, gpu_addr, options);
1215 delete_state |= has_deleted_images;
1216 } while (has_deleted_images);
1217 has_deleted_images = delete_state;
1161 if (!image_id) { 1218 if (!image_id) {
1162 return NULL_IMAGE_VIEW_ID; 1219 return NULL_IMAGE_VIEW_ID;
1163 } 1220 }
@@ -1759,8 +1816,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
1759 } 1816 }
1760 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); 1817 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1761 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); 1818 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1762 if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { 1819 if (runtime.ShouldReinterpret(dst, src)) {
1763 return runtime.ConvertImage(dst, src, copies); 1820 return runtime.ReinterpretImage(dst, src, copies);
1764 } 1821 }
1765 for (const ImageCopy& copy : copies) { 1822 for (const ImageCopy& copy : copies) {
1766 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); 1823 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
@@ -1780,7 +1837,13 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
1780 const SubresourceExtent src_extent{.levels = 1, .layers = 1}; 1837 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1781 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; 1838 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1782 const SubresourceRange src_range{.base = src_base, .extent = src_extent}; 1839 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1783 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); 1840 PixelFormat dst_format = dst.info.format;
1841 if (GetFormatType(src.info.format) == SurfaceType::DepthStencil &&
1842 GetFormatType(dst_format) == SurfaceType::ColorTexture &&
1843 BytesPerBlock(dst_format) == 4) {
1844 dst_format = PixelFormat::A8B8G8R8_UNORM;
1845 }
1846 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst_format, dst_range);
1784 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); 1847 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1785 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); 1848 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1786 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; 1849 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index a9504c0e8..7107887a6 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -59,8 +59,6 @@ class TextureCache {
59 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; 59 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
60 /// True when the API can provide info about the memory of the device. 60 /// True when the API can provide info about the memory of the device.
61 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; 61 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
62 /// True when the API provides utilities for pixel format conversions.
63 static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS;
64 62
65 static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; 63 static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
66 static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; 64 static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
@@ -254,7 +252,8 @@ private:
254 252
255 /// Return a blit image pair from the given guest blit parameters 253 /// Return a blit image pair from the given guest blit parameters
256 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, 254 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
257 const Tegra::Engines::Fermi2D::Surface& src); 255 const Tegra::Engines::Fermi2D::Surface& src,
256 const Tegra::Engines::Fermi2D::Config& copy);
258 257
259 /// Find or create a sampler from a guest descriptor sampler 258 /// Find or create a sampler from a guest descriptor sampler
260 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); 259 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 5c274abdf..5ac27b3a7 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 {
54 Size = 1 << 0, 54 Size = 1 << 0,
55 Format = 1 << 1, 55 Format = 1 << 1,
56 Samples = 1 << 2, 56 Samples = 1 << 2,
57 ForceBrokenViews = 1 << 3,
57}; 58};
58DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) 59DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
59 60
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index ddc9fb13a..7bd31b211 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1151,6 +1151,7 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
1151 1151
1152void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, 1152void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
1153 const ImageBase* src) { 1153 const ImageBase* src) {
1154 const auto original_dst_format = dst_info.format;
1154 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { 1155 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1155 src_info.format = src->info.format; 1156 src_info.format = src->info.format;
1156 } 1157 }
@@ -1161,7 +1162,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase*
1161 dst_info.format = src->info.format; 1162 dst_info.format = src->info.format;
1162 } 1163 }
1163 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { 1164 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1164 src_info.format = dst->info.format; 1165 if (src) {
1166 if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) {
1167 dst_info.format = original_dst_format;
1168 }
1169 } else {
1170 src_info.format = dst->info.format;
1171 }
1165 } 1172 }
1166} 1173}
1167 1174
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index e852c817e..329bf4def 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -55,10 +55,4 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
55 } 55 }
56} 56}
57 57
58float GetResolutionScaleFactor(const RendererBase& renderer) {
59 return Settings::values.resolution_info.active
60 ? Settings::values.resolution_info.up_factor
61 : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio();
62}
63
64} // namespace VideoCore 58} // namespace VideoCore
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index f86877e86..084df641f 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -25,6 +25,4 @@ class RendererBase;
25/// Creates an emulated GPU instance using the given system context. 25/// Creates an emulated GPU instance using the given system context.
26std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); 26std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
27 27
28float GetResolutionScaleFactor(const RendererBase& renderer);
29
30} // namespace VideoCore 28} // namespace VideoCore
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 95106f88f..7bf5b6578 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -21,6 +21,13 @@
21namespace Vulkan { 21namespace Vulkan {
22namespace { 22namespace {
23namespace Alternatives { 23namespace Alternatives {
24constexpr std::array STENCIL8_UINT{
25 VK_FORMAT_D16_UNORM_S8_UINT,
26 VK_FORMAT_D24_UNORM_S8_UINT,
27 VK_FORMAT_D32_SFLOAT_S8_UINT,
28 VK_FORMAT_UNDEFINED,
29};
30
24constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ 31constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
25 VK_FORMAT_D32_SFLOAT_S8_UINT, 32 VK_FORMAT_D32_SFLOAT_S8_UINT,
26 VK_FORMAT_D16_UNORM_S8_UINT, 33 VK_FORMAT_D16_UNORM_S8_UINT,
@@ -74,6 +81,8 @@ void SetNext(void**& next, T& data) {
74 81
75constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { 82constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
76 switch (format) { 83 switch (format) {
84 case VK_FORMAT_S8_UINT:
85 return Alternatives::STENCIL8_UINT.data();
77 case VK_FORMAT_D24_UNORM_S8_UINT: 86 case VK_FORMAT_D24_UNORM_S8_UINT:
78 return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); 87 return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
79 case VK_FORMAT_D16_UNORM_S8_UINT: 88 case VK_FORMAT_D16_UNORM_S8_UINT:
@@ -121,6 +130,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
121 VK_FORMAT_R16G16_UNORM, 130 VK_FORMAT_R16G16_UNORM,
122 VK_FORMAT_R16G16_SNORM, 131 VK_FORMAT_R16G16_SNORM,
123 VK_FORMAT_R16G16_SFLOAT, 132 VK_FORMAT_R16G16_SFLOAT,
133 VK_FORMAT_R16G16_UINT,
124 VK_FORMAT_R16G16_SINT, 134 VK_FORMAT_R16G16_SINT,
125 VK_FORMAT_R16_UNORM, 135 VK_FORMAT_R16_UNORM,
126 VK_FORMAT_R16_SNORM, 136 VK_FORMAT_R16_SNORM,
@@ -145,6 +155,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
145 VK_FORMAT_R4G4B4A4_UNORM_PACK16, 155 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
146 VK_FORMAT_D32_SFLOAT, 156 VK_FORMAT_D32_SFLOAT,
147 VK_FORMAT_D16_UNORM, 157 VK_FORMAT_D16_UNORM,
158 VK_FORMAT_S8_UINT,
148 VK_FORMAT_D16_UNORM_S8_UINT, 159 VK_FORMAT_D16_UNORM_S8_UINT,
149 VK_FORMAT_D24_UNORM_S8_UINT, 160 VK_FORMAT_D24_UNORM_S8_UINT,
150 VK_FORMAT_D32_SFLOAT_S8_UINT, 161 VK_FORMAT_D32_SFLOAT_S8_UINT,