summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Marshall Mohror2021-10-23 00:11:12 -0500
committerGravatar Fernando Sahmkow2021-11-16 22:11:33 +0100
commitbb0367548591a66cd963b14373ed5b9c61adabb1 (patch)
tree988222e55a86b1023678f68b42617f55648c63d7 /src
parentvk_blit_screen: Fix AA destruction order (diff)
downloadyuzu-bb0367548591a66cd963b14373ed5b9c61adabb1.tar.gz
yuzu-bb0367548591a66cd963b14373ed5b9c61adabb1.tar.xz
yuzu-bb0367548591a66cd963b14373ed5b9c61adabb1.zip
Vulkan: Reimplement FSR constant generation functions to avoid GCC warnings
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp153
2 files changed, 145 insertions, 9 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 07b94dcc8..91a30fef7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -237,7 +237,6 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
237target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) 237target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
238target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) 238target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
239 239
240target_link_libraries(video_core PRIVATE ffx-fsr)
241add_dependencies(video_core host_shaders) 240add_dependencies(video_core host_shaders)
242target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) 241target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
243target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 242target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index 9288aa7c2..2feaa9d37 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -13,9 +13,146 @@
13#include "video_core/renderer_vulkan/vk_shader_util.h" 13#include "video_core/renderer_vulkan/vk_shader_util.h"
14#include "video_core/vulkan_common/vulkan_device.h" 14#include "video_core/vulkan_common/vulkan_device.h"
15 15
16#define A_CPU 16// Reimplementations of the constant generating functions in ffx_fsr1.h
17#include <ffx_a.h> 17// GCC generated a lot of warnings when using the official header.
18#include <ffx_fsr1.h> 18static u32 AU1_AH1_AF1(f32 f) {
19 static constexpr u32 base[512]{
20 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
21 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
22 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
23 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
24 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
25 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
26 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
27 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
28 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
29 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
30 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
31 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
32 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
33 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
34 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
35 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
36 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
37 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
38 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
39 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
40 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
41 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
42 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
43 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
44 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
45 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
46 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
47 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
48 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
49 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
50 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
51 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
52 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
53 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
54 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
55 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
56 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
57 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
58 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
59 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
60 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
61 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
62 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
63 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
64 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
65 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
66 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
67 };
68 static constexpr s8 shift[512]{
69 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
70 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
71 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
72 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
73 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
74 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
75 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
76 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
77 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
78 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
79 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
80 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
81 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
82 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
83 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
84 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
85 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
86 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
87 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
88 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
89 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
90 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
91 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
92 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
93 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
94 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
95 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
96 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
97 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
98 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
99 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
100 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
101 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
102 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
103 0x18, 0x18,
104 };
105 auto u = std::bit_cast<u32>(f);
106 u32 i = u >> 23;
107 return base[i] + ((u & 0x7fffff) >> shift[i]);
108}
109
110static u32 AU1_AH2_AF2(f32 a[2]) {
111 return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
112}
113
114static void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
115 f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
116 f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
117 f32 outputSizeInPixelsY) {
118 con0[0] = std::bit_cast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
119 con0[1] = std::bit_cast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
120 con0[2] = std::bit_cast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
121 con0[3] = std::bit_cast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
122 con1[0] = std::bit_cast<u32>(1.0f / inputSizeInPixelsX);
123 con1[1] = std::bit_cast<u32>(1.0f / inputSizeInPixelsY);
124 con1[2] = std::bit_cast<u32>(1.0f / inputSizeInPixelsX);
125 con1[3] = std::bit_cast<u32>(-1.0f / inputSizeInPixelsY);
126 con2[0] = std::bit_cast<u32>(-1.0f / inputSizeInPixelsX);
127 con2[1] = std::bit_cast<u32>(2.0f / inputSizeInPixelsY);
128 con2[2] = std::bit_cast<u32>(1.0f / inputSizeInPixelsX);
129 con2[3] = std::bit_cast<u32>(2.0f / inputSizeInPixelsY);
130 con3[0] = std::bit_cast<u32>(0.0f / inputSizeInPixelsX);
131 con3[1] = std::bit_cast<u32>(4.0f / inputSizeInPixelsY);
132 con3[2] = con3[3] = 0;
133}
134
135static void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
136 f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
137 f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
138 f32 outputSizeInPixelsX, f32 outputSizeInPixelsY,
139 f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
140 FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
141 inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
142 con0[2] = std::bit_cast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
143 inputOffsetInPixelsX);
144 con0[3] = std::bit_cast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
145 inputOffsetInPixelsY);
146}
147
148static void FsrRcasCon(u32* con, f32 sharpness) {
149 sharpness = std::exp2f(-sharpness);
150 f32 hSharp[2]{sharpness, sharpness};
151 con[0] = std::bit_cast<u32>(sharpness);
152 con[1] = AU1_AH2_AF2(hSharp);
153 con[2] = 0;
154 con[3] = 0;
155}
19 156
20namespace Vulkan { 157namespace Vulkan {
21 158
@@ -62,15 +199,15 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
62 199
63 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); 200 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
64 201
65 std::array<AU1, 4 * 4> push_constants; 202 std::array<u32, 4 * 4> push_constants;
66 FsrEasuConOffset( 203 FsrEasuConOffset(
67 push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, 204 push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
68 push_constants.data() + 12, 205 push_constants.data() + 12,
69 206
70 static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()), 207 static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
71 static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height), 208 static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
72 static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height), 209 static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
73 static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top)); 210 static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
74 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); 211 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
75 212
76 { 213 {