diff options
| author | 2021-12-18 13:57:14 +0800 | |
|---|---|---|
| committer | 2021-12-18 13:57:14 +0800 | |
| commit | e49184e6069a9d791d2df3c1958f5c4b1187e124 (patch) | |
| tree | b776caf722e0be0e680f67b0ad0842628162ef1c /src/video_core | |
| parent | Implement convert legacy to generic (diff) | |
| parent | Merge pull request #7570 from ameerj/favorites-expanded (diff) | |
| download | yuzu-e49184e6069a9d791d2df3c1958f5c4b1187e124.tar.gz yuzu-e49184e6069a9d791d2df3c1958f5c4b1187e124.tar.xz yuzu-e49184e6069a9d791d2df3c1958f5c4b1187e124.zip | |
Merge branch 'yuzu-emu:master' into convert_legacy
Diffstat (limited to 'src/video_core')
28 files changed, 698 insertions, 130 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..6a6325e38 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -3,6 +3,7 @@ add_subdirectory(host_shaders) | |||
| 3 | if(LIBVA_FOUND) | 3 | if(LIBVA_FOUND) |
| 4 | set_source_files_properties(command_classes/codecs/codec.cpp | 4 | set_source_files_properties(command_classes/codecs/codec.cpp |
| 5 | PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) | 5 | PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) |
| 6 | list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) | ||
| 6 | endif() | 7 | endif() |
| 7 | 8 | ||
| 8 | add_library(video_core STATIC | 9 | add_library(video_core STATIC |
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 916277811..2a532b883 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 6 | #include <cstdio> | ||
| 5 | #include <fstream> | 7 | #include <fstream> |
| 6 | #include <vector> | 8 | #include <vector> |
| 7 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| @@ -15,12 +17,28 @@ | |||
| 15 | 17 | ||
| 16 | extern "C" { | 18 | extern "C" { |
| 17 | #include <libavutil/opt.h> | 19 | #include <libavutil/opt.h> |
| 20 | #ifdef LIBVA_FOUND | ||
| 21 | // for querying VAAPI driver information | ||
| 22 | #include <libavutil/hwcontext_vaapi.h> | ||
| 23 | #endif | ||
| 18 | } | 24 | } |
| 19 | 25 | ||
| 20 | namespace Tegra { | 26 | namespace Tegra { |
| 21 | namespace { | 27 | namespace { |
| 22 | constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; | 28 | constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; |
| 23 | constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; | 29 | constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; |
| 30 | constexpr std::array PREFERRED_GPU_DECODERS = { | ||
| 31 | AV_HWDEVICE_TYPE_CUDA, | ||
| 32 | #ifdef _WIN32 | ||
| 33 | AV_HWDEVICE_TYPE_D3D11VA, | ||
| 34 | AV_HWDEVICE_TYPE_DXVA2, | ||
| 35 | #elif defined(__linux__) | ||
| 36 | AV_HWDEVICE_TYPE_VAAPI, | ||
| 37 | AV_HWDEVICE_TYPE_VDPAU, | ||
| 38 | #endif | ||
| 39 | // last resort for Linux Flatpak (w/ NVIDIA) | ||
| 40 | AV_HWDEVICE_TYPE_VULKAN, | ||
| 41 | }; | ||
| 24 | 42 | ||
| 25 | void AVPacketDeleter(AVPacket* ptr) { | 43 | void AVPacketDeleter(AVPacket* ptr) { |
| 26 | av_packet_free(&ptr); | 44 | av_packet_free(&ptr); |
| @@ -59,46 +77,50 @@ Codec::~Codec() { | |||
| 59 | av_buffer_unref(&av_gpu_decoder); | 77 | av_buffer_unref(&av_gpu_decoder); |
| 60 | } | 78 | } |
| 61 | 79 | ||
| 80 | // List all the currently available hwcontext in ffmpeg | ||
| 81 | static std::vector<AVHWDeviceType> ListSupportedContexts() { | ||
| 82 | std::vector<AVHWDeviceType> contexts{}; | ||
| 83 | AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; | ||
| 84 | do { | ||
| 85 | current_device_type = av_hwdevice_iterate_types(current_device_type); | ||
| 86 | contexts.push_back(current_device_type); | ||
| 87 | } while (current_device_type != AV_HWDEVICE_TYPE_NONE); | ||
| 88 | return contexts; | ||
| 89 | } | ||
| 90 | |||
| 62 | bool Codec::CreateGpuAvDevice() { | 91 | bool Codec::CreateGpuAvDevice() { |
| 63 | #if defined(LIBVA_FOUND) | ||
| 64 | static constexpr std::array<const char*, 3> VAAPI_DRIVERS = { | ||
| 65 | "i915", | ||
| 66 | "iHD", | ||
| 67 | "amdgpu", | ||
| 68 | }; | ||
| 69 | AVDictionary* hwdevice_options = nullptr; | ||
| 70 | av_dict_set(&hwdevice_options, "connection_type", "drm", 0); | ||
| 71 | for (const auto& driver : VAAPI_DRIVERS) { | ||
| 72 | av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); | ||
| 73 | const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, | ||
| 74 | nullptr, hwdevice_options, 0); | ||
| 75 | if (hwdevice_error >= 0) { | ||
| 76 | LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); | ||
| 77 | av_dict_free(&hwdevice_options); | ||
| 78 | av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; | ||
| 79 | return true; | ||
| 80 | } | ||
| 81 | LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); | ||
| 82 | } | ||
| 83 | LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); | ||
| 84 | av_dict_free(&hwdevice_options); | ||
| 85 | #endif | ||
| 86 | static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; | 92 | static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; |
| 87 | static constexpr std::array GPU_DECODER_TYPES{ | 93 | static const auto supported_contexts = ListSupportedContexts(); |
| 88 | AV_HWDEVICE_TYPE_CUDA, | 94 | for (const auto& type : PREFERRED_GPU_DECODERS) { |
| 89 | #ifdef _WIN32 | 95 | if (std::none_of(supported_contexts.begin(), supported_contexts.end(), |
| 90 | AV_HWDEVICE_TYPE_D3D11VA, | 96 | [&type](const auto& context) { return context == type; })) { |
| 91 | #else | 97 | LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); |
| 92 | AV_HWDEVICE_TYPE_VDPAU, | 98 | continue; |
| 93 | #endif | 99 | } |
| 94 | }; | ||
| 95 | for (const auto& type : GPU_DECODER_TYPES) { | ||
| 96 | const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); | 100 | const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); |
| 97 | if (hwdevice_res < 0) { | 101 | if (hwdevice_res < 0) { |
| 98 | LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", | 102 | LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", |
| 99 | av_hwdevice_get_type_name(type), hwdevice_res); | 103 | av_hwdevice_get_type_name(type), hwdevice_res); |
| 100 | continue; | 104 | continue; |
| 101 | } | 105 | } |
| 106 | #ifdef LIBVA_FOUND | ||
| 107 | if (type == AV_HWDEVICE_TYPE_VAAPI) { | ||
| 108 | // we need to determine if this is an impersonated VAAPI driver | ||
| 109 | AVHWDeviceContext* hwctx = | ||
| 110 | static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data)); | ||
| 111 | AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx); | ||
| 112 | const char* vendor_name = vaQueryVendorString(vactx->display); | ||
| 113 | if (strstr(vendor_name, "VDPAU backend")) { | ||
| 114 | // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them | ||
| 115 | LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver"); | ||
| 116 | continue; | ||
| 117 | } else { | ||
| 118 | // according to some user testing, certain vaapi driver (Intel?) could be buggy | ||
| 119 | // so let's log the driver name which may help the developers/supporters | ||
| 120 | LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | #endif | ||
| 102 | for (int i = 0;; i++) { | 124 | for (int i = 0;; i++) { |
| 103 | const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); | 125 | const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); |
| 104 | if (!config) { | 126 | if (!config) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ab7c21a49..8788f5148 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -185,16 +185,6 @@ struct GPU::Impl { | |||
| 185 | return *dma_pusher; | 185 | return *dma_pusher; |
| 186 | } | 186 | } |
| 187 | 187 | ||
| 188 | /// Returns a reference to the GPU CDMA pusher. | ||
| 189 | [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { | ||
| 190 | return *cdma_pusher; | ||
| 191 | } | ||
| 192 | |||
| 193 | /// Returns a const reference to the GPU CDMA pusher. | ||
| 194 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { | ||
| 195 | return *cdma_pusher; | ||
| 196 | } | ||
| 197 | |||
| 198 | /// Returns a reference to the underlying renderer. | 188 | /// Returns a reference to the underlying renderer. |
| 199 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | 189 | [[nodiscard]] VideoCore::RendererBase& Renderer() { |
| 200 | return *renderer; | 190 | return *renderer; |
| @@ -338,25 +328,27 @@ struct GPU::Impl { | |||
| 338 | } | 328 | } |
| 339 | 329 | ||
| 340 | /// Push GPU command buffer entries to be processed | 330 | /// Push GPU command buffer entries to be processed |
| 341 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | 331 | void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { |
| 342 | if (!use_nvdec) { | 332 | if (!use_nvdec) { |
| 343 | return; | 333 | return; |
| 344 | } | 334 | } |
| 345 | 335 | ||
| 346 | if (!cdma_pusher) { | 336 | if (!cdma_pushers.contains(id)) { |
| 347 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu); | 337 | cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu)); |
| 348 | } | 338 | } |
| 349 | 339 | ||
| 350 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | 340 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working |
| 351 | // TODO(ameerj): RE proper async nvdec operation | 341 | // TODO(ameerj): RE proper async nvdec operation |
| 352 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | 342 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); |
| 353 | 343 | cdma_pushers[id]->ProcessEntries(std::move(entries)); | |
| 354 | cdma_pusher->ProcessEntries(std::move(entries)); | ||
| 355 | } | 344 | } |
| 356 | 345 | ||
| 357 | /// Frees the CDMAPusher instance to free up resources | 346 | /// Frees the CDMAPusher instance to free up resources |
| 358 | void ClearCdmaInstance() { | 347 | void ClearCdmaInstance(u32 id) { |
| 359 | cdma_pusher.reset(); | 348 | const auto iter = cdma_pushers.find(id); |
| 349 | if (iter != cdma_pushers.end()) { | ||
| 350 | cdma_pushers.erase(iter); | ||
| 351 | } | ||
| 360 | } | 352 | } |
| 361 | 353 | ||
| 362 | /// Swap buffers (render frame) | 354 | /// Swap buffers (render frame) |
| @@ -659,7 +651,7 @@ struct GPU::Impl { | |||
| 659 | Core::System& system; | 651 | Core::System& system; |
| 660 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 652 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 661 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 653 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 662 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | 654 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; |
| 663 | std::unique_ptr<VideoCore::RendererBase> renderer; | 655 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 664 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 656 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 665 | const bool use_nvdec; | 657 | const bool use_nvdec; |
| @@ -811,14 +803,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const { | |||
| 811 | return impl->DmaPusher(); | 803 | return impl->DmaPusher(); |
| 812 | } | 804 | } |
| 813 | 805 | ||
| 814 | Tegra::CDmaPusher& GPU::CDmaPusher() { | ||
| 815 | return impl->CDmaPusher(); | ||
| 816 | } | ||
| 817 | |||
| 818 | const Tegra::CDmaPusher& GPU::CDmaPusher() const { | ||
| 819 | return impl->CDmaPusher(); | ||
| 820 | } | ||
| 821 | |||
| 822 | VideoCore::RendererBase& GPU::Renderer() { | 806 | VideoCore::RendererBase& GPU::Renderer() { |
| 823 | return impl->Renderer(); | 807 | return impl->Renderer(); |
| 824 | } | 808 | } |
| @@ -887,12 +871,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | |||
| 887 | impl->PushGPUEntries(std::move(entries)); | 871 | impl->PushGPUEntries(std::move(entries)); |
| 888 | } | 872 | } |
| 889 | 873 | ||
| 890 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | 874 | void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { |
| 891 | impl->PushCommandBuffer(entries); | 875 | impl->PushCommandBuffer(id, entries); |
| 892 | } | 876 | } |
| 893 | 877 | ||
| 894 | void GPU::ClearCdmaInstance() { | 878 | void GPU::ClearCdmaInstance(u32 id) { |
| 895 | impl->ClearCdmaInstance(); | 879 | impl->ClearCdmaInstance(id); |
| 896 | } | 880 | } |
| 897 | 881 | ||
| 898 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 882 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 05e5c94f3..500411176 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -83,6 +83,7 @@ enum class DepthFormat : u32 { | |||
| 83 | S8_UINT_Z24_UNORM = 0x14, | 83 | S8_UINT_Z24_UNORM = 0x14, |
| 84 | D24X8_UNORM = 0x15, | 84 | D24X8_UNORM = 0x15, |
| 85 | D24S8_UNORM = 0x16, | 85 | D24S8_UNORM = 0x16, |
| 86 | S8_UINT = 0x17, | ||
| 86 | D24C8_UNORM = 0x18, | 87 | D24C8_UNORM = 0x18, |
| 87 | D32_FLOAT_S8X24_UINT = 0x19, | 88 | D32_FLOAT_S8X24_UINT = 0x19, |
| 88 | }; | 89 | }; |
| @@ -241,10 +242,10 @@ public: | |||
| 241 | void PushGPUEntries(Tegra::CommandList&& entries); | 242 | void PushGPUEntries(Tegra::CommandList&& entries); |
| 242 | 243 | ||
| 243 | /// Push GPU command buffer entries to be processed | 244 | /// Push GPU command buffer entries to be processed |
| 244 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); | 245 | void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); |
| 245 | 246 | ||
| 246 | /// Frees the CDMAPusher instance to free up resources | 247 | /// Frees the CDMAPusher instance to free up resources |
| 247 | void ClearCdmaInstance(); | 248 | void ClearCdmaInstance(u32 id); |
| 248 | 249 | ||
| 249 | /// Swap buffers (render frame) | 250 | /// Swap buffers (render frame) |
| 250 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 251 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index d779a967a..fd3e41434 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -10,6 +10,8 @@ set(SHADER_FILES | |||
| 10 | astc_decoder.comp | 10 | astc_decoder.comp |
| 11 | block_linear_unswizzle_2d.comp | 11 | block_linear_unswizzle_2d.comp |
| 12 | block_linear_unswizzle_3d.comp | 12 | block_linear_unswizzle_3d.comp |
| 13 | convert_abgr8_to_d24s8.frag | ||
| 14 | convert_d24s8_to_abgr8.frag | ||
| 13 | convert_depth_to_float.frag | 15 | convert_depth_to_float.frag |
| 14 | convert_float_to_depth.frag | 16 | convert_float_to_depth.frag |
| 15 | full_screen_triangle.vert | 17 | full_screen_triangle.vert |
diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag new file mode 100644 index 000000000..ea055ddad --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | #extension GL_ARB_shader_stencil_export : require | ||
| 7 | |||
| 8 | layout(binding = 0) uniform sampler2D color_texture; | ||
| 9 | |||
| 10 | void main() { | ||
| 11 | ivec2 coord = ivec2(gl_FragCoord.xy); | ||
| 12 | uvec4 color = uvec4(texelFetch(color_texture, coord, 0).abgr * (exp2(8) - 1.0f)); | ||
| 13 | uvec4 bytes = color << uvec4(24, 16, 8, 0); | ||
| 14 | uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w; | ||
| 15 | |||
| 16 | gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); | ||
| 17 | gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); | ||
| 18 | } | ||
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag new file mode 100644 index 000000000..94368fb59 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | layout(binding = 0) uniform sampler2D depth_tex; | ||
| 8 | layout(binding = 1) uniform isampler2D stencil_tex; | ||
| 9 | |||
| 10 | layout(location = 0) out vec4 color; | ||
| 11 | |||
| 12 | void main() { | ||
| 13 | ivec2 coord = ivec2(gl_FragCoord.xy); | ||
| 14 | uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); | ||
| 15 | uint stencil = uint(textureLod(stencil_tex, coord, 0).r); | ||
| 16 | |||
| 17 | highp uint depth_val = | ||
| 18 | uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); | ||
| 19 | lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; | ||
| 20 | highp uvec4 components = | ||
| 21 | uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); | ||
| 22 | color.abgr = vec4(components) / (exp2(8.0) - 1.0); | ||
| 23 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6956535e5..14e6522f2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 11 | 11 | ||
| 12 | #include "common/bit_util.h" | ||
| 12 | #include "common/literals.h" | 13 | #include "common/literals.h" |
| 13 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 14 | #include "video_core/renderer_opengl/gl_device.h" | 15 | #include "video_core/renderer_opengl/gl_device.h" |
| @@ -148,6 +149,8 @@ GLenum AttachmentType(PixelFormat format) { | |||
| 148 | switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { | 149 | switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { |
| 149 | case SurfaceType::Depth: | 150 | case SurfaceType::Depth: |
| 150 | return GL_DEPTH_ATTACHMENT; | 151 | return GL_DEPTH_ATTACHMENT; |
| 152 | case SurfaceType::Stencil: | ||
| 153 | return GL_STENCIL_ATTACHMENT; | ||
| 151 | case SurfaceType::DepthStencil: | 154 | case SurfaceType::DepthStencil: |
| 152 | return GL_DEPTH_STENCIL_ATTACHMENT; | 155 | return GL_DEPTH_STENCIL_ATTACHMENT; |
| 153 | default: | 156 | default: |
| @@ -317,13 +320,12 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 317 | } | 320 | } |
| 318 | } | 321 | } |
| 319 | 322 | ||
| 320 | OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { | 323 | OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format, |
| 324 | GLsizei gl_num_levels) { | ||
| 321 | const GLenum target = ImageTarget(info); | 325 | const GLenum target = ImageTarget(info); |
| 322 | const GLsizei width = info.size.width; | 326 | const GLsizei width = info.size.width; |
| 323 | const GLsizei height = info.size.height; | 327 | const GLsizei height = info.size.height; |
| 324 | const GLsizei depth = info.size.depth; | 328 | const GLsizei depth = info.size.depth; |
| 325 | const int max_host_mip_levels = std::bit_width(info.size.width); | ||
| 326 | const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); | ||
| 327 | const GLsizei num_layers = info.resources.layers; | 329 | const GLsizei num_layers = info.resources.layers; |
| 328 | const GLsizei num_samples = info.num_samples; | 330 | const GLsizei num_samples = info.num_samples; |
| 329 | 331 | ||
| @@ -335,10 +337,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form | |||
| 335 | } | 337 | } |
| 336 | switch (target) { | 338 | switch (target) { |
| 337 | case GL_TEXTURE_1D_ARRAY: | 339 | case GL_TEXTURE_1D_ARRAY: |
| 338 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); | 340 | glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, num_layers); |
| 339 | break; | 341 | break; |
| 340 | case GL_TEXTURE_2D_ARRAY: | 342 | case GL_TEXTURE_2D_ARRAY: |
| 341 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); | 343 | glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, num_layers); |
| 342 | break; | 344 | break; |
| 343 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { | 345 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { |
| 344 | // TODO: Where should 'fixedsamplelocations' come from? | 346 | // TODO: Where should 'fixedsamplelocations' come from? |
| @@ -348,10 +350,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form | |||
| 348 | break; | 350 | break; |
| 349 | } | 351 | } |
| 350 | case GL_TEXTURE_RECTANGLE: | 352 | case GL_TEXTURE_RECTANGLE: |
| 351 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); | 353 | glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, height); |
| 352 | break; | 354 | break; |
| 353 | case GL_TEXTURE_3D: | 355 | case GL_TEXTURE_3D: |
| 354 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | 356 | glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, depth); |
| 355 | break; | 357 | break; |
| 356 | case GL_TEXTURE_BUFFER: | 358 | case GL_TEXTURE_BUFFER: |
| 357 | UNREACHABLE(); | 359 | UNREACHABLE(); |
| @@ -397,9 +399,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form | |||
| 397 | return GL_R32UI; | 399 | return GL_R32UI; |
| 398 | } | 400 | } |
| 399 | 401 | ||
| 400 | [[nodiscard]] u32 NextPow2(u32 value) { | ||
| 401 | return 1U << (32U - std::countl_zero(value - 1U)); | ||
| 402 | } | ||
| 403 | } // Anonymous namespace | 402 | } // Anonymous namespace |
| 404 | 403 | ||
| 405 | ImageBufferMap::~ImageBufferMap() { | 404 | ImageBufferMap::~ImageBufferMap() { |
| @@ -526,8 +525,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, | |||
| 526 | } | 525 | } |
| 527 | } | 526 | } |
| 528 | 527 | ||
| 529 | void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, | 528 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, |
| 530 | std::span<const VideoCommon::ImageCopy> copies) { | 529 | std::span<const VideoCommon::ImageCopy> copies) { |
| 531 | LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); | 530 | LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); |
| 532 | format_conversion_pass.ConvertImage(dst, src, copies); | 531 | format_conversion_pass.ConvertImage(dst, src, copies); |
| 533 | } | 532 | } |
| @@ -696,7 +695,9 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, | |||
| 696 | gl_format = tuple.format; | 695 | gl_format = tuple.format; |
| 697 | gl_type = tuple.type; | 696 | gl_type = tuple.type; |
| 698 | } | 697 | } |
| 699 | texture = MakeImage(info, gl_internal_format); | 698 | const int max_host_mip_levels = std::bit_width(info.size.width); |
| 699 | gl_num_levels = std::min(info.resources.levels, max_host_mip_levels); | ||
| 700 | texture = MakeImage(info, gl_internal_format, gl_num_levels); | ||
| 700 | current_texture = texture.handle; | 701 | current_texture = texture.handle; |
| 701 | if (runtime->device.HasDebuggingToolAttached()) { | 702 | if (runtime->device.HasDebuggingToolAttached()) { |
| 702 | const std::string name = VideoCommon::Name(*this); | 703 | const std::string name = VideoCommon::Name(*this); |
| @@ -724,6 +725,9 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
| 724 | u32 current_image_height = std::numeric_limits<u32>::max(); | 725 | u32 current_image_height = std::numeric_limits<u32>::max(); |
| 725 | 726 | ||
| 726 | for (const VideoCommon::BufferImageCopy& copy : copies) { | 727 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 728 | if (copy.image_subresource.base_level >= gl_num_levels) { | ||
| 729 | continue; | ||
| 730 | } | ||
| 727 | if (current_row_length != copy.buffer_row_length) { | 731 | if (current_row_length != copy.buffer_row_length) { |
| 728 | current_row_length = copy.buffer_row_length; | 732 | current_row_length = copy.buffer_row_length; |
| 729 | glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); | 733 | glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); |
| @@ -753,6 +757,9 @@ void Image::DownloadMemory(ImageBufferMap& map, | |||
| 753 | u32 current_image_height = std::numeric_limits<u32>::max(); | 757 | u32 current_image_height = std::numeric_limits<u32>::max(); |
| 754 | 758 | ||
| 755 | for (const VideoCommon::BufferImageCopy& copy : copies) { | 759 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 760 | if (copy.image_subresource.base_level >= gl_num_levels) { | ||
| 761 | continue; | ||
| 762 | } | ||
| 756 | if (current_row_length != copy.buffer_row_length) { | 763 | if (current_row_length != copy.buffer_row_length) { |
| 757 | current_row_length = copy.buffer_row_length; | 764 | current_row_length = copy.buffer_row_length; |
| 758 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); | 765 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); |
| @@ -792,7 +799,7 @@ GLuint Image::StorageHandle() noexcept { | |||
| 792 | } | 799 | } |
| 793 | store_view.Create(); | 800 | store_view.Create(); |
| 794 | glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, | 801 | glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, |
| 795 | info.resources.levels, 0, info.resources.layers); | 802 | gl_num_levels, 0, info.resources.layers); |
| 796 | return store_view.handle; | 803 | return store_view.handle; |
| 797 | default: | 804 | default: |
| 798 | return current_texture; | 805 | return current_texture; |
| @@ -907,6 +914,8 @@ void Image::Scale(bool up_scale) { | |||
| 907 | return GL_COLOR_ATTACHMENT0; | 914 | return GL_COLOR_ATTACHMENT0; |
| 908 | case SurfaceType::Depth: | 915 | case SurfaceType::Depth: |
| 909 | return GL_DEPTH_ATTACHMENT; | 916 | return GL_DEPTH_ATTACHMENT; |
| 917 | case SurfaceType::Stencil: | ||
| 918 | return GL_STENCIL_ATTACHMENT; | ||
| 910 | case SurfaceType::DepthStencil: | 919 | case SurfaceType::DepthStencil: |
| 911 | return GL_DEPTH_STENCIL_ATTACHMENT; | 920 | return GL_DEPTH_STENCIL_ATTACHMENT; |
| 912 | default: | 921 | default: |
| @@ -920,8 +929,10 @@ void Image::Scale(bool up_scale) { | |||
| 920 | return GL_COLOR_BUFFER_BIT; | 929 | return GL_COLOR_BUFFER_BIT; |
| 921 | case SurfaceType::Depth: | 930 | case SurfaceType::Depth: |
| 922 | return GL_DEPTH_BUFFER_BIT; | 931 | return GL_DEPTH_BUFFER_BIT; |
| 932 | case SurfaceType::Stencil: | ||
| 933 | return GL_STENCIL_BUFFER_BIT; | ||
| 923 | case SurfaceType::DepthStencil: | 934 | case SurfaceType::DepthStencil: |
| 924 | return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; | 935 | return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; |
| 925 | default: | 936 | default: |
| 926 | UNREACHABLE(); | 937 | UNREACHABLE(); |
| 927 | return GL_COLOR_BUFFER_BIT; | 938 | return GL_COLOR_BUFFER_BIT; |
| @@ -933,8 +944,10 @@ void Image::Scale(bool up_scale) { | |||
| 933 | return 0; | 944 | return 0; |
| 934 | case SurfaceType::Depth: | 945 | case SurfaceType::Depth: |
| 935 | return 1; | 946 | return 1; |
| 936 | case SurfaceType::DepthStencil: | 947 | case SurfaceType::Stencil: |
| 937 | return 2; | 948 | return 2; |
| 949 | case SurfaceType::DepthStencil: | ||
| 950 | return 3; | ||
| 938 | default: | 951 | default: |
| 939 | UNREACHABLE(); | 952 | UNREACHABLE(); |
| 940 | return 0; | 953 | return 0; |
| @@ -956,7 +969,7 @@ void Image::Scale(bool up_scale) { | |||
| 956 | auto dst_info = info; | 969 | auto dst_info = info; |
| 957 | dst_info.size.width = scaled_width; | 970 | dst_info.size.width = scaled_width; |
| 958 | dst_info.size.height = scaled_height; | 971 | dst_info.size.height = scaled_height; |
| 959 | upscaled_backup = MakeImage(dst_info, gl_internal_format); | 972 | upscaled_backup = MakeImage(dst_info, gl_internal_format, gl_num_levels); |
| 960 | } | 973 | } |
| 961 | const u32 src_width = up_scale ? original_width : scaled_width; | 974 | const u32 src_width = up_scale ? original_width : scaled_width; |
| 962 | const u32 src_height = up_scale ? original_height : scaled_height; | 975 | const u32 src_height = up_scale ? original_height : scaled_height; |
| @@ -1264,10 +1277,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1264 | } | 1277 | } |
| 1265 | 1278 | ||
| 1266 | if (const ImageView* const image_view = depth_buffer; image_view) { | 1279 | if (const ImageView* const image_view = depth_buffer; image_view) { |
| 1267 | if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { | 1280 | switch (GetFormatType(image_view->format)) { |
| 1281 | case SurfaceType::Depth: | ||
| 1282 | buffer_bits |= GL_DEPTH_BUFFER_BIT; | ||
| 1283 | break; | ||
| 1284 | case SurfaceType::Stencil: | ||
| 1285 | buffer_bits |= GL_STENCIL_BUFFER_BIT; | ||
| 1286 | break; | ||
| 1287 | case SurfaceType::DepthStencil: | ||
| 1268 | buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | 1288 | buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; |
| 1269 | } else { | 1289 | break; |
| 1290 | default: | ||
| 1291 | UNREACHABLE(); | ||
| 1270 | buffer_bits |= GL_DEPTH_BUFFER_BIT; | 1292 | buffer_bits |= GL_DEPTH_BUFFER_BIT; |
| 1293 | break; | ||
| 1271 | } | 1294 | } |
| 1272 | const GLenum attachment = AttachmentType(image_view->format); | 1295 | const GLenum attachment = AttachmentType(image_view->format); |
| 1273 | AttachTexture(handle, attachment, image_view); | 1296 | AttachTexture(handle, attachment, image_view); |
| @@ -1308,7 +1331,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image, | |||
| 1308 | const u32 copy_size = region.width * region.height * region.depth * img_bpp; | 1331 | const u32 copy_size = region.width * region.height * region.depth * img_bpp; |
| 1309 | if (pbo_size < copy_size) { | 1332 | if (pbo_size < copy_size) { |
| 1310 | intermediate_pbo.Create(); | 1333 | intermediate_pbo.Create(); |
| 1311 | pbo_size = NextPow2(copy_size); | 1334 | pbo_size = Common::NextPow2(copy_size); |
| 1312 | glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY); | 1335 | glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY); |
| 1313 | } | 1336 | } |
| 1314 | // Copy from source to PBO | 1337 | // Copy from source to PBO |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 578f8d523..37d5e6a6b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -84,9 +84,13 @@ public: | |||
| 84 | 84 | ||
| 85 | u64 GetDeviceLocalMemory() const; | 85 | u64 GetDeviceLocalMemory() const; |
| 86 | 86 | ||
| 87 | bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { | ||
| 88 | return true; | ||
| 89 | } | ||
| 90 | |||
| 87 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 91 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 88 | 92 | ||
| 89 | void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 93 | void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 90 | 94 | ||
| 91 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { | 95 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { |
| 92 | UNIMPLEMENTED(); | 96 | UNIMPLEMENTED(); |
| @@ -164,8 +168,8 @@ private: | |||
| 164 | 168 | ||
| 165 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; | 169 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; |
| 166 | 170 | ||
| 167 | std::array<OGLFramebuffer, 3> rescale_draw_fbos; | 171 | std::array<OGLFramebuffer, 4> rescale_draw_fbos; |
| 168 | std::array<OGLFramebuffer, 3> rescale_read_fbos; | 172 | std::array<OGLFramebuffer, 4> rescale_read_fbos; |
| 169 | const Settings::ResolutionScalingInfo& resolution; | 173 | const Settings::ResolutionScalingInfo& resolution; |
| 170 | }; | 174 | }; |
| 171 | 175 | ||
| @@ -221,6 +225,7 @@ private: | |||
| 221 | GLenum gl_internal_format = GL_NONE; | 225 | GLenum gl_internal_format = GL_NONE; |
| 222 | GLenum gl_format = GL_NONE; | 226 | GLenum gl_format = GL_NONE; |
| 223 | GLenum gl_type = GL_NONE; | 227 | GLenum gl_type = GL_NONE; |
| 228 | GLsizei gl_num_levels{}; | ||
| 224 | TextureCacheRuntime* runtime{}; | 229 | TextureCacheRuntime* runtime{}; |
| 225 | GLuint current_texture{}; | 230 | GLuint current_texture{}; |
| 226 | }; | 231 | }; |
| @@ -338,7 +343,6 @@ struct TextureCacheParams { | |||
| 338 | static constexpr bool FRAMEBUFFER_BLITS = true; | 343 | static constexpr bool FRAMEBUFFER_BLITS = true; |
| 339 | static constexpr bool HAS_EMULATED_COPIES = true; | 344 | static constexpr bool HAS_EMULATED_COPIES = true; |
| 340 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | 345 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; |
| 341 | static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true; | ||
| 342 | 346 | ||
| 343 | using Runtime = OpenGL::TextureCacheRuntime; | 347 | using Runtime = OpenGL::TextureCacheRuntime; |
| 344 | using Image = OpenGL::Image; | 348 | using Image = OpenGL::Image; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 39158aa3e..daba42ed9 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -108,6 +108,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB | |||
| 108 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | 108 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT |
| 109 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | 109 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT |
| 110 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | 110 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM |
| 111 | {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT | ||
| 111 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | 112 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT |
| 112 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | 113 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM |
| 113 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | 114 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, |
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b3884a4f5..9a38b6b34 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | 6 | ||
| 7 | #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" | ||
| 8 | #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" | ||
| 7 | #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" | 9 | #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" |
| 8 | #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" | 10 | #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" |
| 9 | #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" | 11 | #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" |
| @@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | |||
| 354 | blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), | 356 | blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), |
| 355 | convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), | 357 | convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), |
| 356 | convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), | 358 | convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), |
| 359 | convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), | ||
| 360 | convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), | ||
| 357 | linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), | 361 | linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), |
| 358 | nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { | 362 | nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { |
| 359 | if (device.IsExtShaderStencilExportSupported()) { | 363 | if (device.IsExtShaderStencilExportSupported()) { |
| @@ -448,6 +452,22 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, | |||
| 448 | Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); | 452 | Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); |
| 449 | } | 453 | } |
| 450 | 454 | ||
| 455 | void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, | ||
| 456 | ImageView& src_image_view, u32 up_scale, u32 down_shift) { | ||
| 457 | ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), | ||
| 458 | convert_abgr8_to_d24s8_frag, true); | ||
| 459 | ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, | ||
| 460 | down_shift); | ||
| 461 | } | ||
| 462 | |||
| 463 | void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, | ||
| 464 | ImageView& src_image_view, u32 up_scale, u32 down_shift) { | ||
| 465 | ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), | ||
| 466 | convert_d24s8_to_abgr8_frag, false); | ||
| 467 | ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, | ||
| 468 | down_shift); | ||
| 469 | } | ||
| 470 | |||
| 451 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | 471 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, |
| 452 | const ImageView& src_image_view, u32 up_scale, u32 down_shift) { | 472 | const ImageView& src_image_view, u32 up_scale, u32 down_shift) { |
| 453 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 473 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| @@ -495,6 +515,101 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb | |||
| 495 | scheduler.InvalidateState(); | 515 | scheduler.InvalidateState(); |
| 496 | } | 516 | } |
| 497 | 517 | ||
| 518 | void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 519 | ImageView& src_image_view, u32 up_scale, u32 down_shift) { | ||
| 520 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | ||
| 521 | const VkImageView src_view = src_image_view.ColorView(); | ||
| 522 | const VkSampler sampler = *nearest_sampler; | ||
| 523 | const VkExtent2D extent{ | ||
| 524 | .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), | ||
| 525 | .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), | ||
| 526 | }; | ||
| 527 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 528 | scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, | ||
| 529 | this](vk::CommandBuffer cmdbuf) { | ||
| 530 | const VkOffset2D offset{ | ||
| 531 | .x = 0, | ||
| 532 | .y = 0, | ||
| 533 | }; | ||
| 534 | const VkViewport viewport{ | ||
| 535 | .x = 0.0f, | ||
| 536 | .y = 0.0f, | ||
| 537 | .width = static_cast<float>(extent.width), | ||
| 538 | .height = static_cast<float>(extent.height), | ||
| 539 | .minDepth = 0.0f, | ||
| 540 | .maxDepth = 0.0f, | ||
| 541 | }; | ||
| 542 | const VkRect2D scissor{ | ||
| 543 | .offset = offset, | ||
| 544 | .extent = extent, | ||
| 545 | }; | ||
| 546 | const PushConstants push_constants{ | ||
| 547 | .tex_scale = {viewport.width, viewport.height}, | ||
| 548 | .tex_offset = {0.0f, 0.0f}, | ||
| 549 | }; | ||
| 550 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 551 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | ||
| 552 | |||
| 553 | // TODO: Barriers | ||
| 554 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 555 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 556 | nullptr); | ||
| 557 | cmdbuf.SetViewport(0, viewport); | ||
| 558 | cmdbuf.SetScissor(0, scissor); | ||
| 559 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||
| 560 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 561 | }); | ||
| 562 | scheduler.InvalidateState(); | ||
| 563 | } | ||
| 564 | |||
| 565 | void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 566 | ImageView& src_image_view, u32 up_scale, u32 down_shift) { | ||
| 567 | const VkPipelineLayout layout = *two_textures_pipeline_layout; | ||
| 568 | const VkImageView src_depth_view = src_image_view.DepthView(); | ||
| 569 | const VkImageView src_stencil_view = src_image_view.StencilView(); | ||
| 570 | const VkSampler sampler = *nearest_sampler; | ||
| 571 | const VkExtent2D extent{ | ||
| 572 | .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), | ||
| 573 | .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), | ||
| 574 | }; | ||
| 575 | scheduler.RequestRenderpass(dst_framebuffer); | ||
| 576 | scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale, | ||
| 577 | down_shift, this](vk::CommandBuffer cmdbuf) { | ||
| 578 | const VkOffset2D offset{ | ||
| 579 | .x = 0, | ||
| 580 | .y = 0, | ||
| 581 | }; | ||
| 582 | const VkViewport viewport{ | ||
| 583 | .x = 0.0f, | ||
| 584 | .y = 0.0f, | ||
| 585 | .width = static_cast<float>(extent.width), | ||
| 586 | .height = static_cast<float>(extent.height), | ||
| 587 | .minDepth = 0.0f, | ||
| 588 | .maxDepth = 0.0f, | ||
| 589 | }; | ||
| 590 | const VkRect2D scissor{ | ||
| 591 | .offset = offset, | ||
| 592 | .extent = extent, | ||
| 593 | }; | ||
| 594 | const PushConstants push_constants{ | ||
| 595 | .tex_scale = {viewport.width, viewport.height}, | ||
| 596 | .tex_offset = {0.0f, 0.0f}, | ||
| 597 | }; | ||
| 598 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 599 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, | ||
| 600 | src_stencil_view); | ||
| 601 | // TODO: Barriers | ||
| 602 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 603 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 604 | nullptr); | ||
| 605 | cmdbuf.SetViewport(0, viewport); | ||
| 606 | cmdbuf.SetScissor(0, scissor); | ||
| 607 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||
| 608 | cmdbuf.Draw(3, 1, 0, 0); | ||
| 609 | }); | ||
| 610 | scheduler.InvalidateState(); | ||
| 611 | } | ||
| 612 | |||
| 498 | VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { | 613 | VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { |
| 499 | const auto it = std::ranges::find(blit_color_keys, key); | 614 | const auto it = std::ranges::find(blit_color_keys, key); |
| 500 | if (it != blit_color_keys.end()) { | 615 | if (it != blit_color_keys.end()) { |
| @@ -636,4 +751,44 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend | |||
| 636 | }); | 751 | }); |
| 637 | } | 752 | } |
| 638 | 753 | ||
| 754 | void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, | ||
| 755 | vk::ShaderModule& module, bool is_target_depth, | ||
| 756 | bool single_texture) { | ||
| 757 | if (pipeline) { | ||
| 758 | return; | ||
| 759 | } | ||
| 760 | const std::array stages = MakeStages(*full_screen_vert, *module); | ||
| 761 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 762 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 763 | .pNext = nullptr, | ||
| 764 | .flags = 0, | ||
| 765 | .stageCount = static_cast<u32>(stages.size()), | ||
| 766 | .pStages = stages.data(), | ||
| 767 | .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 768 | .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 769 | .pTessellationState = nullptr, | ||
| 770 | .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 771 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 772 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 773 | .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, | ||
| 774 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, | ||
| 775 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 776 | .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, | ||
| 777 | .renderPass = renderpass, | ||
| 778 | .subpass = 0, | ||
| 779 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 780 | .basePipelineIndex = 0, | ||
| 781 | }); | ||
| 782 | } | ||
| 783 | |||
| 784 | void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, | ||
| 785 | vk::ShaderModule& module, bool single_texture) { | ||
| 786 | ConvertPipelineEx(pipeline, renderpass, module, false, single_texture); | ||
| 787 | } | ||
| 788 | |||
| 789 | void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, | ||
| 790 | vk::ShaderModule& module, bool single_texture) { | ||
| 791 | ConvertPipelineEx(pipeline, renderpass, module, true, single_texture); | ||
| 792 | } | ||
| 793 | |||
| 639 | } // namespace Vulkan | 794 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index d77f76678..b1a717090 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h | |||
| @@ -56,10 +56,22 @@ public: | |||
| 56 | void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | 56 | void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
| 57 | u32 up_scale, u32 down_shift); | 57 | u32 up_scale, u32 down_shift); |
| 58 | 58 | ||
| 59 | void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, | ||
| 60 | u32 up_scale, u32 down_shift); | ||
| 61 | |||
| 62 | void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, | ||
| 63 | u32 up_scale, u32 down_shift); | ||
| 64 | |||
| 59 | private: | 65 | private: |
| 60 | void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | 66 | void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, |
| 61 | const ImageView& src_image_view, u32 up_scale, u32 down_shift); | 67 | const ImageView& src_image_view, u32 up_scale, u32 down_shift); |
| 62 | 68 | ||
| 69 | void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 70 | ImageView& src_image_view, u32 up_scale, u32 down_shift); | ||
| 71 | |||
| 72 | void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | ||
| 73 | ImageView& src_image_view, u32 up_scale, u32 down_shift); | ||
| 74 | |||
| 63 | [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); | 75 | [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); |
| 64 | 76 | ||
| 65 | [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); | 77 | [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); |
| @@ -68,6 +80,15 @@ private: | |||
| 68 | 80 | ||
| 69 | void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); | 81 | void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); |
| 70 | 82 | ||
| 83 | void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, | ||
| 84 | vk::ShaderModule& module, bool is_target_depth, bool single_texture); | ||
| 85 | |||
| 86 | void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, | ||
| 87 | vk::ShaderModule& module, bool single_texture); | ||
| 88 | |||
| 89 | void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, | ||
| 90 | vk::ShaderModule& module, bool single_texture); | ||
| 91 | |||
| 71 | const Device& device; | 92 | const Device& device; |
| 72 | VKScheduler& scheduler; | 93 | VKScheduler& scheduler; |
| 73 | StateTracker& state_tracker; | 94 | StateTracker& state_tracker; |
| @@ -83,6 +104,8 @@ private: | |||
| 83 | vk::ShaderModule blit_depth_stencil_frag; | 104 | vk::ShaderModule blit_depth_stencil_frag; |
| 84 | vk::ShaderModule convert_depth_to_float_frag; | 105 | vk::ShaderModule convert_depth_to_float_frag; |
| 85 | vk::ShaderModule convert_float_to_depth_frag; | 106 | vk::ShaderModule convert_float_to_depth_frag; |
| 107 | vk::ShaderModule convert_abgr8_to_d24s8_frag; | ||
| 108 | vk::ShaderModule convert_d24s8_to_abgr8_frag; | ||
| 86 | vk::Sampler linear_sampler; | 109 | vk::Sampler linear_sampler; |
| 87 | vk::Sampler nearest_sampler; | 110 | vk::Sampler nearest_sampler; |
| 88 | 111 | ||
| @@ -94,6 +117,8 @@ private: | |||
| 94 | vk::Pipeline convert_r32_to_d32_pipeline; | 117 | vk::Pipeline convert_r32_to_d32_pipeline; |
| 95 | vk::Pipeline convert_d16_to_r16_pipeline; | 118 | vk::Pipeline convert_d16_to_r16_pipeline; |
| 96 | vk::Pipeline convert_r16_to_d16_pipeline; | 119 | vk::Pipeline convert_r16_to_d16_pipeline; |
| 120 | vk::Pipeline convert_abgr8_to_d24s8_pipeline; | ||
| 121 | vk::Pipeline convert_d24s8_to_abgr8_pipeline; | ||
| 97 | }; | 122 | }; |
| 98 | 123 | ||
| 99 | } // namespace Vulkan | 124 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 68a23b602..751e4792b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -162,7 +162,7 @@ struct FormatTuple { | |||
| 162 | {VK_FORMAT_UNDEFINED}, // R16_SINT | 162 | {VK_FORMAT_UNDEFINED}, // R16_SINT |
| 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM | 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM |
| 164 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT | 164 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT |
| 165 | {VK_FORMAT_UNDEFINED}, // R16G16_UINT | 165 | {VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT |
| 166 | {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT | 166 | {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT |
| 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM | 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM |
| 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT | 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT |
| @@ -176,8 +176,8 @@ struct FormatTuple { | |||
| 176 | {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT | 176 | {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT |
| 177 | {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT | 177 | {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT |
| 178 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM | 178 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM |
| 179 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM | 179 | {VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM |
| 180 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM | 180 | {VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM |
| 181 | {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB | 181 | {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB |
| 182 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB | 182 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB |
| 183 | {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB | 183 | {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB |
| @@ -208,6 +208,9 @@ struct FormatTuple { | |||
| 208 | {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT | 208 | {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT |
| 209 | {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM | 209 | {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM |
| 210 | 210 | ||
| 211 | // Stencil formats | ||
| 212 | {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT | ||
| 213 | |||
| 211 | // DepthStencil formats | 214 | // DepthStencil formats |
| 212 | {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT | 215 | {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT |
| 213 | {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) | 216 | {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 407fd2a15..197cba8e3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/bit_cast.h" | 10 | #include "common/bit_cast.h" |
| 11 | #include "common/bit_util.h" | ||
| 11 | #include "common/settings.h" | 12 | #include "common/settings.h" |
| 12 | 13 | ||
| 13 | #include "video_core/engines/fermi_2d.h" | 14 | #include "video_core/engines/fermi_2d.h" |
| @@ -102,6 +103,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 102 | usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | 103 | usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; |
| 103 | break; | 104 | break; |
| 104 | case VideoCore::Surface::SurfaceType::Depth: | 105 | case VideoCore::Surface::SurfaceType::Depth: |
| 106 | case VideoCore::Surface::SurfaceType::Stencil: | ||
| 105 | case VideoCore::Surface::SurfaceType::DepthStencil: | 107 | case VideoCore::Surface::SurfaceType::DepthStencil: |
| 106 | usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; | 108 | usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; |
| 107 | break; | 109 | break; |
| @@ -173,6 +175,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 173 | return VK_IMAGE_ASPECT_COLOR_BIT; | 175 | return VK_IMAGE_ASPECT_COLOR_BIT; |
| 174 | case VideoCore::Surface::SurfaceType::Depth: | 176 | case VideoCore::Surface::SurfaceType::Depth: |
| 175 | return VK_IMAGE_ASPECT_DEPTH_BIT; | 177 | return VK_IMAGE_ASPECT_DEPTH_BIT; |
| 178 | case VideoCore::Surface::SurfaceType::Stencil: | ||
| 179 | return VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 176 | case VideoCore::Surface::SurfaceType::DepthStencil: | 180 | case VideoCore::Surface::SurfaceType::DepthStencil: |
| 177 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; | 181 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; |
| 178 | default: | 182 | default: |
| @@ -195,6 +199,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 195 | case PixelFormat::D16_UNORM: | 199 | case PixelFormat::D16_UNORM: |
| 196 | case PixelFormat::D32_FLOAT: | 200 | case PixelFormat::D32_FLOAT: |
| 197 | return VK_IMAGE_ASPECT_DEPTH_BIT; | 201 | return VK_IMAGE_ASPECT_DEPTH_BIT; |
| 202 | case PixelFormat::S8_UINT: | ||
| 203 | return VK_IMAGE_ASPECT_STENCIL_BIT; | ||
| 198 | default: | 204 | default: |
| 199 | return VK_IMAGE_ASPECT_COLOR_BIT; | 205 | return VK_IMAGE_ASPECT_COLOR_BIT; |
| 200 | } | 206 | } |
| @@ -308,6 +314,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 308 | }; | 314 | }; |
| 309 | } | 315 | } |
| 310 | 316 | ||
| 317 | [[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, | ||
| 318 | VkImageAspectFlags aspect_mask) noexcept { | ||
| 319 | return VkBufferImageCopy{ | ||
| 320 | .bufferOffset = 0, | ||
| 321 | .bufferRowLength = 0, | ||
| 322 | .bufferImageHeight = 0, | ||
| 323 | .imageSubresource = MakeImageSubresourceLayers( | ||
| 324 | is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), | ||
| 325 | .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), | ||
| 326 | .imageExtent = MakeExtent3D(copy.extent), | ||
| 327 | }; | ||
| 328 | } | ||
| 329 | |||
| 311 | [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( | 330 | [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( |
| 312 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { | 331 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 313 | std::vector<VkBufferCopy> result(copies.size()); | 332 | std::vector<VkBufferCopy> result(copies.size()); |
| @@ -754,6 +773,173 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | |||
| 754 | return staging_buffer_pool.Request(size, MemoryUsage::Download); | 773 | return staging_buffer_pool.Request(size, MemoryUsage::Download); |
| 755 | } | 774 | } |
| 756 | 775 | ||
| 776 | bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { | ||
| 777 | if (VideoCore::Surface::GetFormatType(dst.info.format) == | ||
| 778 | VideoCore::Surface::SurfaceType::DepthStencil && | ||
| 779 | !device.IsExtShaderStencilExportSupported()) { | ||
| 780 | return true; | ||
| 781 | } | ||
| 782 | if (VideoCore::Surface::GetFormatType(src.info.format) == | ||
| 783 | VideoCore::Surface::SurfaceType::DepthStencil && | ||
| 784 | !device.IsExtShaderStencilExportSupported()) { | ||
| 785 | return true; | ||
| 786 | } | ||
| 787 | if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT || | ||
| 788 | src.info.format == PixelFormat::D32_FLOAT_S8_UINT) { | ||
| 789 | return true; | ||
| 790 | } | ||
| 791 | return false; | ||
| 792 | } | ||
| 793 | |||
| 794 | VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { | ||
| 795 | const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); | ||
| 796 | if (buffer_commits[level]) { | ||
| 797 | return *buffers[level]; | ||
| 798 | } | ||
| 799 | const auto new_size = Common::NextPow2(needed_size); | ||
| 800 | static constexpr VkBufferUsageFlags flags = | ||
| 801 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 802 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; | ||
| 803 | buffers[level] = device.GetLogical().CreateBuffer({ | ||
| 804 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 805 | .pNext = nullptr, | ||
| 806 | .flags = 0, | ||
| 807 | .size = new_size, | ||
| 808 | .usage = flags, | ||
| 809 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 810 | .queueFamilyIndexCount = 0, | ||
| 811 | .pQueueFamilyIndices = nullptr, | ||
| 812 | }); | ||
| 813 | buffer_commits[level] = std::make_unique<MemoryCommit>( | ||
| 814 | memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); | ||
| 815 | return *buffers[level]; | ||
| 816 | } | ||
| 817 | |||
| 818 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | ||
| 819 | std::span<const VideoCommon::ImageCopy> copies) { | ||
| 820 | std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); | ||
| 821 | std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); | ||
| 822 | const VkImageAspectFlags src_aspect_mask = src.AspectMask(); | ||
| 823 | const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); | ||
| 824 | |||
| 825 | std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { | ||
| 826 | return MakeBufferImageCopy(copy, true, src_aspect_mask); | ||
| 827 | }); | ||
| 828 | std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { | ||
| 829 | return MakeBufferImageCopy(copy, false, dst_aspect_mask); | ||
| 830 | }); | ||
| 831 | const u32 img_bpp = BytesPerBlock(src.info.format); | ||
| 832 | size_t total_size = 0; | ||
| 833 | for (const auto& copy : copies) { | ||
| 834 | total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; | ||
| 835 | } | ||
| 836 | const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); | ||
| 837 | const VkImage dst_image = dst.Handle(); | ||
| 838 | const VkImage src_image = src.Handle(); | ||
| 839 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 840 | scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, | ||
| 841 | vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { | ||
| 842 | RangedBarrierRange dst_range; | ||
| 843 | RangedBarrierRange src_range; | ||
| 844 | for (const VkBufferImageCopy& copy : vk_in_copies) { | ||
| 845 | src_range.AddLayers(copy.imageSubresource); | ||
| 846 | } | ||
| 847 | for (const VkBufferImageCopy& copy : vk_out_copies) { | ||
| 848 | dst_range.AddLayers(copy.imageSubresource); | ||
| 849 | } | ||
| 850 | static constexpr VkMemoryBarrier READ_BARRIER{ | ||
| 851 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 852 | .pNext = nullptr, | ||
| 853 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 854 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 855 | }; | ||
| 856 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 857 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 858 | .pNext = nullptr, | ||
| 859 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 860 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 861 | }; | ||
| 862 | const std::array pre_barriers{ | ||
| 863 | VkImageMemoryBarrier{ | ||
| 864 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 865 | .pNext = nullptr, | ||
| 866 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 867 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 868 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 869 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 870 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 871 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 872 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 873 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 874 | .image = src_image, | ||
| 875 | .subresourceRange = src_range.SubresourceRange(src_aspect_mask), | ||
| 876 | }, | ||
| 877 | }; | ||
| 878 | const std::array middle_in_barrier{ | ||
| 879 | VkImageMemoryBarrier{ | ||
| 880 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 881 | .pNext = nullptr, | ||
| 882 | .srcAccessMask = 0, | ||
| 883 | .dstAccessMask = 0, | ||
| 884 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 885 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 886 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 887 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 888 | .image = src_image, | ||
| 889 | .subresourceRange = src_range.SubresourceRange(src_aspect_mask), | ||
| 890 | }, | ||
| 891 | }; | ||
| 892 | const std::array middle_out_barrier{ | ||
| 893 | VkImageMemoryBarrier{ | ||
| 894 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 895 | .pNext = nullptr, | ||
| 896 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 897 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 898 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 899 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 900 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 901 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 902 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 903 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 904 | .image = dst_image, | ||
| 905 | .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), | ||
| 906 | }, | ||
| 907 | }; | ||
| 908 | const std::array post_barriers{ | ||
| 909 | VkImageMemoryBarrier{ | ||
| 910 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 911 | .pNext = nullptr, | ||
| 912 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 913 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||
| 914 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 915 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 916 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 917 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 918 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 919 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 920 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 921 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 922 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 923 | .image = dst_image, | ||
| 924 | .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), | ||
| 925 | }, | ||
| 926 | }; | ||
| 927 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 928 | 0, {}, {}, pre_barriers); | ||
| 929 | |||
| 930 | cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, | ||
| 931 | vk_in_copies); | ||
| 932 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 933 | 0, WRITE_BARRIER, nullptr, middle_in_barrier); | ||
| 934 | |||
| 935 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 936 | 0, READ_BARRIER, {}, middle_out_barrier); | ||
| 937 | cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); | ||
| 938 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 939 | 0, {}, {}, post_barriers); | ||
| 940 | }); | ||
| 941 | } | ||
| 942 | |||
| 757 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 943 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 758 | const Region2D& dst_region, const Region2D& src_region, | 944 | const Region2D& dst_region, const Region2D& src_region, |
| 759 | Tegra::Engines::Fermi2D::Filter filter, | 945 | Tegra::Engines::Fermi2D::Filter filter, |
| @@ -881,6 +1067,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im | |||
| 881 | return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); | 1067 | return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); |
| 882 | } | 1068 | } |
| 883 | break; | 1069 | break; |
| 1070 | case PixelFormat::A8B8G8R8_UNORM: | ||
| 1071 | if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 1072 | return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); | ||
| 1073 | } | ||
| 1074 | break; | ||
| 884 | case PixelFormat::R32_FLOAT: | 1075 | case PixelFormat::R32_FLOAT: |
| 885 | if (src_view.format == PixelFormat::D32_FLOAT) { | 1076 | if (src_view.format == PixelFormat::D32_FLOAT) { |
| 886 | return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); | 1077 | return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); |
| @@ -891,6 +1082,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im | |||
| 891 | return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); | 1082 | return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); |
| 892 | } | 1083 | } |
| 893 | break; | 1084 | break; |
| 1085 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 1086 | return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); | ||
| 1087 | break; | ||
| 894 | case PixelFormat::D32_FLOAT: | 1088 | case PixelFormat::D32_FLOAT: |
| 895 | if (src_view.format == PixelFormat::R32_FLOAT) { | 1089 | if (src_view.format == PixelFormat::R32_FLOAT) { |
| 896 | return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); | 1090 | return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); |
| @@ -1386,6 +1580,14 @@ VkImageView ImageView::StencilView() { | |||
| 1386 | return *stencil_view; | 1580 | return *stencil_view; |
| 1387 | } | 1581 | } |
| 1388 | 1582 | ||
| 1583 | VkImageView ImageView::ColorView() { | ||
| 1584 | if (color_view) { | ||
| 1585 | return *color_view; | ||
| 1586 | } | ||
| 1587 | color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT); | ||
| 1588 | return *color_view; | ||
| 1589 | } | ||
| 1590 | |||
| 1389 | VkImageView ImageView::StorageView(Shader::TextureType texture_type, | 1591 | VkImageView ImageView::StorageView(Shader::TextureType texture_type, |
| 1390 | Shader::ImageFormat image_format) { | 1592 | Shader::ImageFormat image_format) { |
| 1391 | if (image_format == Shader::ImageFormat::Typeless) { | 1593 | if (image_format == Shader::ImageFormat::Typeless) { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f5f8f9a74..753e3e8a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -61,6 +61,10 @@ public: | |||
| 61 | 61 | ||
| 62 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 62 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 63 | 63 | ||
| 64 | bool ShouldReinterpret(Image& dst, Image& src); | ||
| 65 | |||
| 66 | void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||
| 67 | |||
| 64 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); | 68 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); |
| 65 | 69 | ||
| 66 | bool CanAccelerateImageUpload(Image&) const noexcept { | 70 | bool CanAccelerateImageUpload(Image&) const noexcept { |
| @@ -82,6 +86,8 @@ public: | |||
| 82 | return true; | 86 | return true; |
| 83 | } | 87 | } |
| 84 | 88 | ||
| 89 | [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); | ||
| 90 | |||
| 85 | const Device& device; | 91 | const Device& device; |
| 86 | VKScheduler& scheduler; | 92 | VKScheduler& scheduler; |
| 87 | MemoryAllocator& memory_allocator; | 93 | MemoryAllocator& memory_allocator; |
| @@ -90,6 +96,10 @@ public: | |||
| 90 | ASTCDecoderPass& astc_decoder_pass; | 96 | ASTCDecoderPass& astc_decoder_pass; |
| 91 | RenderPassCache& render_pass_cache; | 97 | RenderPassCache& render_pass_cache; |
| 92 | const Settings::ResolutionScalingInfo& resolution; | 98 | const Settings::ResolutionScalingInfo& resolution; |
| 99 | |||
| 100 | constexpr static size_t indexing_slots = 8 * sizeof(size_t); | ||
| 101 | std::array<vk::Buffer, indexing_slots> buffers{}; | ||
| 102 | std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; | ||
| 93 | }; | 103 | }; |
| 94 | 104 | ||
| 95 | class Image : public VideoCommon::ImageBase { | 105 | class Image : public VideoCommon::ImageBase { |
| @@ -174,6 +184,8 @@ public: | |||
| 174 | 184 | ||
| 175 | [[nodiscard]] VkImageView StencilView(); | 185 | [[nodiscard]] VkImageView StencilView(); |
| 176 | 186 | ||
| 187 | [[nodiscard]] VkImageView ColorView(); | ||
| 188 | |||
| 177 | [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, | 189 | [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, |
| 178 | Shader::ImageFormat image_format); | 190 | Shader::ImageFormat image_format); |
| 179 | 191 | ||
| @@ -214,6 +226,7 @@ private: | |||
| 214 | std::unique_ptr<StorageViews> storage_views; | 226 | std::unique_ptr<StorageViews> storage_views; |
| 215 | vk::ImageView depth_view; | 227 | vk::ImageView depth_view; |
| 216 | vk::ImageView stencil_view; | 228 | vk::ImageView stencil_view; |
| 229 | vk::ImageView color_view; | ||
| 217 | VkImage image_handle = VK_NULL_HANDLE; | 230 | VkImage image_handle = VK_NULL_HANDLE; |
| 218 | VkImageView render_target = VK_NULL_HANDLE; | 231 | VkImageView render_target = VK_NULL_HANDLE; |
| 219 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | 232 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; |
| @@ -316,7 +329,6 @@ struct TextureCacheParams { | |||
| 316 | static constexpr bool FRAMEBUFFER_BLITS = false; | 329 | static constexpr bool FRAMEBUFFER_BLITS = false; |
| 317 | static constexpr bool HAS_EMULATED_COPIES = false; | 330 | static constexpr bool HAS_EMULATED_COPIES = false; |
| 318 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | 331 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; |
| 319 | static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false; | ||
| 320 | 332 | ||
| 321 | using Runtime = Vulkan::TextureCacheRuntime; | 333 | using Runtime = Vulkan::TextureCacheRuntime; |
| 322 | using Image = Vulkan::Image; | 334 | using Image = Vulkan::Image; |
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index dc6995b46..bcaf5f575 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp | |||
| @@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept { | |||
| 18 | const int now_complete = num_complete.load(std::memory_order::relaxed); | 18 | const int now_complete = num_complete.load(std::memory_order::relaxed); |
| 19 | const int now_building = num_building.load(std::memory_order::relaxed); | 19 | const int now_building = num_building.load(std::memory_order::relaxed); |
| 20 | if (now_complete == now_building) { | 20 | if (now_complete == now_building) { |
| 21 | const auto now = std::chrono::high_resolution_clock::now(); | 21 | const auto now = std::chrono::steady_clock::now(); |
| 22 | if (completed && num_complete == num_when_completed) { | 22 | if (completed && num_complete == num_when_completed) { |
| 23 | if (now - complete_time > TIME_TO_STOP_REPORTING) { | 23 | if (now - complete_time > TIME_TO_STOP_REPORTING) { |
| 24 | report_base = now_complete; | 24 | report_base = now_complete; |
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index ad363bfb5..4d8d52071 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h | |||
| @@ -28,6 +28,6 @@ private: | |||
| 28 | 28 | ||
| 29 | bool completed{}; | 29 | bool completed{}; |
| 30 | int num_when_completed{}; | 30 | int num_when_completed{}; |
| 31 | std::chrono::high_resolution_clock::time_point complete_time; | 31 | std::chrono::steady_clock::time_point complete_time; |
| 32 | }; | 32 | }; |
| 33 | } // namespace VideoCore | 33 | } // namespace VideoCore |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 58d262446..a36015c8c 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -82,6 +82,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { | |||
| 82 | return PixelFormat::D32_FLOAT; | 82 | return PixelFormat::D32_FLOAT; |
| 83 | case Tegra::DepthFormat::D16_UNORM: | 83 | case Tegra::DepthFormat::D16_UNORM: |
| 84 | return PixelFormat::D16_UNORM; | 84 | return PixelFormat::D16_UNORM; |
| 85 | case Tegra::DepthFormat::S8_UINT: | ||
| 86 | return PixelFormat::S8_UINT; | ||
| 85 | case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: | 87 | case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: |
| 86 | return PixelFormat::D32_FLOAT_S8_UINT; | 88 | return PixelFormat::D32_FLOAT_S8_UINT; |
| 87 | default: | 89 | default: |
| @@ -214,6 +216,11 @@ SurfaceType GetFormatType(PixelFormat pixel_format) { | |||
| 214 | } | 216 | } |
| 215 | 217 | ||
| 216 | if (static_cast<std::size_t>(pixel_format) < | 218 | if (static_cast<std::size_t>(pixel_format) < |
| 219 | static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) { | ||
| 220 | return SurfaceType::Stencil; | ||
| 221 | } | ||
| 222 | |||
| 223 | if (static_cast<std::size_t>(pixel_format) < | ||
| 217 | static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { | 224 | static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { |
| 218 | return SurfaceType::DepthStencil; | 225 | return SurfaceType::DepthStencil; |
| 219 | } | 226 | } |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 2ce7c7d33..33e8d24ab 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -110,8 +110,12 @@ enum class PixelFormat { | |||
| 110 | 110 | ||
| 111 | MaxDepthFormat, | 111 | MaxDepthFormat, |
| 112 | 112 | ||
| 113 | // Stencil formats | ||
| 114 | S8_UINT = MaxDepthFormat, | ||
| 115 | MaxStencilFormat, | ||
| 116 | |||
| 113 | // DepthStencil formats | 117 | // DepthStencil formats |
| 114 | D24_UNORM_S8_UINT = MaxDepthFormat, | 118 | D24_UNORM_S8_UINT = MaxStencilFormat, |
| 115 | S8_UINT_D24_UNORM, | 119 | S8_UINT_D24_UNORM, |
| 116 | D32_FLOAT_S8_UINT, | 120 | D32_FLOAT_S8_UINT, |
| 117 | 121 | ||
| @@ -125,8 +129,9 @@ constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max | |||
| 125 | enum class SurfaceType { | 129 | enum class SurfaceType { |
| 126 | ColorTexture = 0, | 130 | ColorTexture = 0, |
| 127 | Depth = 1, | 131 | Depth = 1, |
| 128 | DepthStencil = 2, | 132 | Stencil = 2, |
| 129 | Invalid = 3, | 133 | DepthStencil = 3, |
| 134 | Invalid = 4, | ||
| 130 | }; | 135 | }; |
| 131 | 136 | ||
| 132 | enum class SurfaceTarget { | 137 | enum class SurfaceTarget { |
| @@ -229,6 +234,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ | |||
| 229 | 1, // E5B9G9R9_FLOAT | 234 | 1, // E5B9G9R9_FLOAT |
| 230 | 1, // D32_FLOAT | 235 | 1, // D32_FLOAT |
| 231 | 1, // D16_UNORM | 236 | 1, // D16_UNORM |
| 237 | 1, // S8_UINT | ||
| 232 | 1, // D24_UNORM_S8_UINT | 238 | 1, // D24_UNORM_S8_UINT |
| 233 | 1, // S8_UINT_D24_UNORM | 239 | 1, // S8_UINT_D24_UNORM |
| 234 | 1, // D32_FLOAT_S8_UINT | 240 | 1, // D32_FLOAT_S8_UINT |
| @@ -328,6 +334,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ | |||
| 328 | 1, // E5B9G9R9_FLOAT | 334 | 1, // E5B9G9R9_FLOAT |
| 329 | 1, // D32_FLOAT | 335 | 1, // D32_FLOAT |
| 330 | 1, // D16_UNORM | 336 | 1, // D16_UNORM |
| 337 | 1, // S8_UINT | ||
| 331 | 1, // D24_UNORM_S8_UINT | 338 | 1, // D24_UNORM_S8_UINT |
| 332 | 1, // S8_UINT_D24_UNORM | 339 | 1, // S8_UINT_D24_UNORM |
| 333 | 1, // D32_FLOAT_S8_UINT | 340 | 1, // D32_FLOAT_S8_UINT |
| @@ -427,6 +434,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ | |||
| 427 | 32, // E5B9G9R9_FLOAT | 434 | 32, // E5B9G9R9_FLOAT |
| 428 | 32, // D32_FLOAT | 435 | 32, // D32_FLOAT |
| 429 | 16, // D16_UNORM | 436 | 16, // D16_UNORM |
| 437 | 8, // S8_UINT | ||
| 430 | 32, // D24_UNORM_S8_UINT | 438 | 32, // D24_UNORM_S8_UINT |
| 431 | 32, // S8_UINT_D24_UNORM | 439 | 32, // S8_UINT_D24_UNORM |
| 432 | 64, // D32_FLOAT_S8_UINT | 440 | 64, // D32_FLOAT_S8_UINT |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index ddfb726fe..afa807d5d 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -139,6 +139,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, | |||
| 139 | return PixelFormat::D16_UNORM; | 139 | return PixelFormat::D16_UNORM; |
| 140 | case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): | 140 | case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): |
| 141 | return PixelFormat::S8_UINT_D24_UNORM; | 141 | return PixelFormat::S8_UINT_D24_UNORM; |
| 142 | case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR): | ||
| 143 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 142 | case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): | 144 | case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): |
| 143 | return PixelFormat::S8_UINT_D24_UNORM; | 145 | return PixelFormat::S8_UINT_D24_UNORM; |
| 144 | case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): | 146 | case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): |
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index c6cf0583f..b2c81057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -194,6 +194,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str | |||
| 194 | return "D32_FLOAT"; | 194 | return "D32_FLOAT"; |
| 195 | case PixelFormat::D16_UNORM: | 195 | case PixelFormat::D16_UNORM: |
| 196 | return "D16_UNORM"; | 196 | return "D16_UNORM"; |
| 197 | case PixelFormat::S8_UINT: | ||
| 198 | return "S8_UINT"; | ||
| 197 | case PixelFormat::D24_UNORM_S8_UINT: | 199 | case PixelFormat::D24_UNORM_S8_UINT: |
| 198 | return "D24_UNORM_S8_UINT"; | 200 | return "D24_UNORM_S8_UINT"; |
| 199 | case PixelFormat::S8_UINT_D24_UNORM: | 201 | case PixelFormat::S8_UINT_D24_UNORM: |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 241f71a91..5aaeb16ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -472,9 +472,10 @@ template <class P> | |||
| 472 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 472 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 473 | const Tegra::Engines::Fermi2D::Surface& src, | 473 | const Tegra::Engines::Fermi2D::Surface& src, |
| 474 | const Tegra::Engines::Fermi2D::Config& copy) { | 474 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 475 | const BlitImages images = GetBlitImages(dst, src); | 475 | const BlitImages images = GetBlitImages(dst, src, copy); |
| 476 | const ImageId dst_id = images.dst_id; | 476 | const ImageId dst_id = images.dst_id; |
| 477 | const ImageId src_id = images.src_id; | 477 | const ImageId src_id = images.src_id; |
| 478 | |||
| 478 | PrepareImage(src_id, false, false); | 479 | PrepareImage(src_id, false, false); |
| 479 | PrepareImage(dst_id, true, false); | 480 | PrepareImage(dst_id, true, false); |
| 480 | 481 | ||
| @@ -758,14 +759,18 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 758 | return ImageId{}; | 759 | return ImageId{}; |
| 759 | } | 760 | } |
| 760 | } | 761 | } |
| 761 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 762 | const bool broken_views = |
| 763 | runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews); | ||
| 762 | const bool native_bgr = runtime.HasNativeBgr(); | 764 | const bool native_bgr = runtime.HasNativeBgr(); |
| 763 | ImageId image_id; | 765 | const bool flexible_formats = True(options & RelaxedOptions::Format); |
| 766 | ImageId image_id{}; | ||
| 767 | boost::container::small_vector<ImageId, 1> image_ids; | ||
| 764 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 768 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 765 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 769 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 766 | return false; | 770 | return false; |
| 767 | } | 771 | } |
| 768 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | 772 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) |
| 773 | [[unlikely]] { | ||
| 769 | const bool strict_size = False(options & RelaxedOptions::Size) && | 774 | const bool strict_size = False(options & RelaxedOptions::Size) && |
| 770 | True(existing_image.flags & ImageFlagBits::Strong); | 775 | True(existing_image.flags & ImageFlagBits::Strong); |
| 771 | const ImageInfo& existing = existing_image.info; | 776 | const ImageInfo& existing = existing_image.info; |
| @@ -774,17 +779,27 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 774 | IsPitchLinearSameSize(existing, info, strict_size) && | 779 | IsPitchLinearSameSize(existing, info, strict_size) && |
| 775 | IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { | 780 | IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { |
| 776 | image_id = existing_image_id; | 781 | image_id = existing_image_id; |
| 777 | return true; | 782 | image_ids.push_back(existing_image_id); |
| 783 | return !flexible_formats && existing.format == info.format; | ||
| 778 | } | 784 | } |
| 779 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, | 785 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, |
| 780 | native_bgr)) { | 786 | native_bgr)) { |
| 781 | image_id = existing_image_id; | 787 | image_id = existing_image_id; |
| 782 | return true; | 788 | image_ids.push_back(existing_image_id); |
| 789 | return !flexible_formats && existing_image.info.format == info.format; | ||
| 783 | } | 790 | } |
| 784 | return false; | 791 | return false; |
| 785 | }; | 792 | }; |
| 786 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | 793 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); |
| 787 | return image_id; | 794 | if (image_ids.size() <= 1) [[likely]] { |
| 795 | return image_id; | ||
| 796 | } | ||
| 797 | auto image_ids_compare = [this](ImageId a, ImageId b) { | ||
| 798 | auto& image_a = slot_images[a]; | ||
| 799 | auto& image_b = slot_images[b]; | ||
| 800 | return image_a.modification_tick < image_b.modification_tick; | ||
| 801 | }; | ||
| 802 | return *std::ranges::max_element(image_ids, image_ids_compare); | ||
| 788 | } | 803 | } |
| 789 | 804 | ||
| 790 | template <class P> | 805 | template <class P> |
| @@ -1076,31 +1091,66 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1076 | 1091 | ||
| 1077 | template <class P> | 1092 | template <class P> |
| 1078 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | 1093 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( |
| 1079 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { | 1094 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, |
| 1080 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; | 1095 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 1096 | |||
| 1097 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples; | ||
| 1081 | const GPUVAddr dst_addr = dst.Address(); | 1098 | const GPUVAddr dst_addr = dst.Address(); |
| 1082 | const GPUVAddr src_addr = src.Address(); | 1099 | const GPUVAddr src_addr = src.Address(); |
| 1083 | ImageInfo dst_info(dst); | 1100 | ImageInfo dst_info(dst); |
| 1084 | ImageInfo src_info(src); | 1101 | ImageInfo src_info(src); |
| 1102 | const bool can_be_depth_blit = | ||
| 1103 | dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point; | ||
| 1085 | ImageId dst_id; | 1104 | ImageId dst_id; |
| 1086 | ImageId src_id; | 1105 | ImageId src_id; |
| 1106 | RelaxedOptions try_options = FIND_OPTIONS; | ||
| 1107 | if (can_be_depth_blit) { | ||
| 1108 | try_options |= RelaxedOptions::Format; | ||
| 1109 | } | ||
| 1087 | do { | 1110 | do { |
| 1088 | has_deleted_images = false; | 1111 | has_deleted_images = false; |
| 1089 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | 1112 | src_id = FindImage(src_info, src_addr, try_options); |
| 1090 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | 1113 | dst_id = FindImage(dst_info, dst_addr, try_options); |
| 1091 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 1092 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | 1114 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; |
| 1093 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | 1115 | if (src_image && src_image->info.num_samples > 1) { |
| 1094 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | 1116 | RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; |
| 1095 | continue; | 1117 | src_id = FindOrInsertImage(src_info, src_addr, find_options); |
| 1118 | dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); | ||
| 1119 | if (has_deleted_images) { | ||
| 1120 | continue; | ||
| 1121 | } | ||
| 1122 | break; | ||
| 1096 | } | 1123 | } |
| 1097 | if (!dst_id) { | 1124 | if (can_be_depth_blit) { |
| 1098 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | 1125 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; |
| 1126 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 1127 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 1128 | continue; | ||
| 1129 | } | ||
| 1099 | } | 1130 | } |
| 1100 | if (!src_id) { | 1131 | if (!src_id) { |
| 1101 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | 1132 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); |
| 1102 | } | 1133 | } |
| 1134 | if (!dst_id) { | ||
| 1135 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 1136 | } | ||
| 1103 | } while (has_deleted_images); | 1137 | } while (has_deleted_images); |
| 1138 | const ImageBase& src_image = slot_images[src_id]; | ||
| 1139 | const ImageBase& dst_image = slot_images[dst_id]; | ||
| 1140 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 1141 | if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) || | ||
| 1142 | GetFormatType(src_info.format) != GetFormatType(src_image.info.format) || | ||
| 1143 | !VideoCore::Surface::IsViewCompatible(dst_info.format, dst_image.info.format, false, | ||
| 1144 | native_bgr) || | ||
| 1145 | !VideoCore::Surface::IsViewCompatible(src_info.format, src_image.info.format, false, | ||
| 1146 | native_bgr)) { | ||
| 1147 | // Make sure the images match the expected format. | ||
| 1148 | do { | ||
| 1149 | has_deleted_images = false; | ||
| 1150 | src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 1151 | dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 1152 | } while (has_deleted_images); | ||
| 1153 | } | ||
| 1104 | return BlitImages{ | 1154 | return BlitImages{ |
| 1105 | .dst_id = dst_id, | 1155 | .dst_id = dst_id, |
| 1106 | .src_id = src_id, | 1156 | .src_id = src_id, |
| @@ -1157,7 +1207,14 @@ template <class P> | |||
| 1157 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | 1207 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1158 | bool is_clear) { | 1208 | bool is_clear) { |
| 1159 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | 1209 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; |
| 1160 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); | 1210 | ImageId image_id{}; |
| 1211 | bool delete_state = has_deleted_images; | ||
| 1212 | do { | ||
| 1213 | has_deleted_images = false; | ||
| 1214 | image_id = FindOrInsertImage(info, gpu_addr, options); | ||
| 1215 | delete_state |= has_deleted_images; | ||
| 1216 | } while (has_deleted_images); | ||
| 1217 | has_deleted_images = delete_state; | ||
| 1161 | if (!image_id) { | 1218 | if (!image_id) { |
| 1162 | return NULL_IMAGE_VIEW_ID; | 1219 | return NULL_IMAGE_VIEW_ID; |
| 1163 | } | 1220 | } |
| @@ -1759,8 +1816,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag | |||
| 1759 | } | 1816 | } |
| 1760 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | 1817 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); |
| 1761 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | 1818 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); |
| 1762 | if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { | 1819 | if (runtime.ShouldReinterpret(dst, src)) { |
| 1763 | return runtime.ConvertImage(dst, src, copies); | 1820 | return runtime.ReinterpretImage(dst, src, copies); |
| 1764 | } | 1821 | } |
| 1765 | for (const ImageCopy& copy : copies) { | 1822 | for (const ImageCopy& copy : copies) { |
| 1766 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | 1823 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); |
| @@ -1780,7 +1837,13 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag | |||
| 1780 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | 1837 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; |
| 1781 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | 1838 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; |
| 1782 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | 1839 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; |
| 1783 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | 1840 | PixelFormat dst_format = dst.info.format; |
| 1841 | if (GetFormatType(src.info.format) == SurfaceType::DepthStencil && | ||
| 1842 | GetFormatType(dst_format) == SurfaceType::ColorTexture && | ||
| 1843 | BytesPerBlock(dst_format) == 4) { | ||
| 1844 | dst_format = PixelFormat::A8B8G8R8_UNORM; | ||
| 1845 | } | ||
| 1846 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst_format, dst_range); | ||
| 1784 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | 1847 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); |
| 1785 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | 1848 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); |
| 1786 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | 1849 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a9504c0e8..7107887a6 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -59,8 +59,6 @@ class TextureCache { | |||
| 59 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | 59 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; |
| 60 | /// True when the API can provide info about the memory of the device. | 60 | /// True when the API can provide info about the memory of the device. |
| 61 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 61 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 62 | /// True when the API provides utilities for pixel format conversions. | ||
| 63 | static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS; | ||
| 64 | 62 | ||
| 65 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 63 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; |
| 66 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 64 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; |
| @@ -254,7 +252,8 @@ private: | |||
| 254 | 252 | ||
| 255 | /// Return a blit image pair from the given guest blit parameters | 253 | /// Return a blit image pair from the given guest blit parameters |
| 256 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, | 254 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, |
| 257 | const Tegra::Engines::Fermi2D::Surface& src); | 255 | const Tegra::Engines::Fermi2D::Surface& src, |
| 256 | const Tegra::Engines::Fermi2D::Config& copy); | ||
| 258 | 257 | ||
| 259 | /// Find or create a sampler from a guest descriptor sampler | 258 | /// Find or create a sampler from a guest descriptor sampler |
| 260 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | 259 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 5c274abdf..5ac27b3a7 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h | |||
| @@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 { | |||
| 54 | Size = 1 << 0, | 54 | Size = 1 << 0, |
| 55 | Format = 1 << 1, | 55 | Format = 1 << 1, |
| 56 | Samples = 1 << 2, | 56 | Samples = 1 << 2, |
| 57 | ForceBrokenViews = 1 << 3, | ||
| 57 | }; | 58 | }; |
| 58 | DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) | 59 | DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) |
| 59 | 60 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index ddc9fb13a..7bd31b211 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -1151,6 +1151,7 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr | |||
| 1151 | 1151 | ||
| 1152 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | 1152 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, |
| 1153 | const ImageBase* src) { | 1153 | const ImageBase* src) { |
| 1154 | const auto original_dst_format = dst_info.format; | ||
| 1154 | if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | 1155 | if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { |
| 1155 | src_info.format = src->info.format; | 1156 | src_info.format = src->info.format; |
| 1156 | } | 1157 | } |
| @@ -1161,7 +1162,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* | |||
| 1161 | dst_info.format = src->info.format; | 1162 | dst_info.format = src->info.format; |
| 1162 | } | 1163 | } |
| 1163 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | 1164 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { |
| 1164 | src_info.format = dst->info.format; | 1165 | if (src) { |
| 1166 | if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { | ||
| 1167 | dst_info.format = original_dst_format; | ||
| 1168 | } | ||
| 1169 | } else { | ||
| 1170 | src_info.format = dst->info.format; | ||
| 1171 | } | ||
| 1165 | } | 1172 | } |
| 1166 | } | 1173 | } |
| 1167 | 1174 | ||
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index e852c817e..329bf4def 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -55,10 +55,4 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor | |||
| 55 | } | 55 | } |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | float GetResolutionScaleFactor(const RendererBase& renderer) { | ||
| 59 | return Settings::values.resolution_info.active | ||
| 60 | ? Settings::values.resolution_info.up_factor | ||
| 61 | : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio(); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace VideoCore | 58 | } // namespace VideoCore |
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f86877e86..084df641f 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h | |||
| @@ -25,6 +25,4 @@ class RendererBase; | |||
| 25 | /// Creates an emulated GPU instance using the given system context. | 25 | /// Creates an emulated GPU instance using the given system context. |
| 26 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); | 26 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); |
| 27 | 27 | ||
| 28 | float GetResolutionScaleFactor(const RendererBase& renderer); | ||
| 29 | |||
| 30 | } // namespace VideoCore | 28 | } // namespace VideoCore |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 95106f88f..7bf5b6578 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -21,6 +21,13 @@ | |||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | namespace { | 22 | namespace { |
| 23 | namespace Alternatives { | 23 | namespace Alternatives { |
| 24 | constexpr std::array STENCIL8_UINT{ | ||
| 25 | VK_FORMAT_D16_UNORM_S8_UINT, | ||
| 26 | VK_FORMAT_D24_UNORM_S8_UINT, | ||
| 27 | VK_FORMAT_D32_SFLOAT_S8_UINT, | ||
| 28 | VK_FORMAT_UNDEFINED, | ||
| 29 | }; | ||
| 30 | |||
| 24 | constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ | 31 | constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ |
| 25 | VK_FORMAT_D32_SFLOAT_S8_UINT, | 32 | VK_FORMAT_D32_SFLOAT_S8_UINT, |
| 26 | VK_FORMAT_D16_UNORM_S8_UINT, | 33 | VK_FORMAT_D16_UNORM_S8_UINT, |
| @@ -74,6 +81,8 @@ void SetNext(void**& next, T& data) { | |||
| 74 | 81 | ||
| 75 | constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { | 82 | constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { |
| 76 | switch (format) { | 83 | switch (format) { |
| 84 | case VK_FORMAT_S8_UINT: | ||
| 85 | return Alternatives::STENCIL8_UINT.data(); | ||
| 77 | case VK_FORMAT_D24_UNORM_S8_UINT: | 86 | case VK_FORMAT_D24_UNORM_S8_UINT: |
| 78 | return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); | 87 | return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); |
| 79 | case VK_FORMAT_D16_UNORM_S8_UINT: | 88 | case VK_FORMAT_D16_UNORM_S8_UINT: |
| @@ -121,6 +130,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica | |||
| 121 | VK_FORMAT_R16G16_UNORM, | 130 | VK_FORMAT_R16G16_UNORM, |
| 122 | VK_FORMAT_R16G16_SNORM, | 131 | VK_FORMAT_R16G16_SNORM, |
| 123 | VK_FORMAT_R16G16_SFLOAT, | 132 | VK_FORMAT_R16G16_SFLOAT, |
| 133 | VK_FORMAT_R16G16_UINT, | ||
| 124 | VK_FORMAT_R16G16_SINT, | 134 | VK_FORMAT_R16G16_SINT, |
| 125 | VK_FORMAT_R16_UNORM, | 135 | VK_FORMAT_R16_UNORM, |
| 126 | VK_FORMAT_R16_SNORM, | 136 | VK_FORMAT_R16_SNORM, |
| @@ -145,6 +155,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica | |||
| 145 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, | 155 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, |
| 146 | VK_FORMAT_D32_SFLOAT, | 156 | VK_FORMAT_D32_SFLOAT, |
| 147 | VK_FORMAT_D16_UNORM, | 157 | VK_FORMAT_D16_UNORM, |
| 158 | VK_FORMAT_S8_UINT, | ||
| 148 | VK_FORMAT_D16_UNORM_S8_UINT, | 159 | VK_FORMAT_D16_UNORM_S8_UINT, |
| 149 | VK_FORMAT_D24_UNORM_S8_UINT, | 160 | VK_FORMAT_D24_UNORM_S8_UINT, |
| 150 | VK_FORMAT_D32_SFLOAT_S8_UINT, | 161 | VK_FORMAT_D32_SFLOAT_S8_UINT, |