summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Marshall Mohror2021-10-22 23:09:29 -0500
committerGravatar Fernando Sahmkow2021-11-16 22:11:32 +0100
commitdcc5b4f6b005a2c89bb4e77bca4cfe8705734021 (patch)
tree8f18cc5669dfcd150e3b97eca7acb91a0738b4ba /src
parentrenderer_vulkan/blit_image: Use generic color state on Depth to Color blits (diff)
downloadyuzu-dcc5b4f6b005a2c89bb4e77bca4cfe8705734021.tar.gz
yuzu-dcc5b4f6b005a2c89bb4e77bca4cfe8705734021.tar.xz
yuzu-dcc5b4f6b005a2c89bb4e77bca4cfe8705734021.zip
Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt10
-rw-r--r--src/video_core/host_shaders/fidelityfx_fsr.comp106
-rw-r--r--src/video_core/host_shaders/opengl_present_scaleforce.frag (renamed from src/video_core/host_shaders/present_scaleforce.frag)86
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp (renamed from src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp)4
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp10
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp (renamed from src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp)4
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp10
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag7
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag5
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h2
14 files changed, 197 insertions, 116 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 91a30fef7..07b94dcc8 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -237,6 +237,7 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
237target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) 237target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
238target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) 238target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
239 239
240target_link_libraries(video_core PRIVATE ffx-fsr)
240add_dependencies(video_core host_shaders) 241add_dependencies(video_core host_shaders)
241target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) 242target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
242target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 243target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 6b5ea649a..d779a967a 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,16 +18,20 @@ set(SHADER_FILES
18 opengl_copy_bc4.comp 18 opengl_copy_bc4.comp
19 opengl_present.frag 19 opengl_present.frag
20 opengl_present.vert 20 opengl_present.vert
21 opengl_present_scaleforce.frag
21 pitch_unswizzle.comp 22 pitch_unswizzle.comp
22 present_scaleforce.frag
23 present_bicubic.frag 23 present_bicubic.frag
24 present_gaussian.frag 24 present_gaussian.frag
25 vulkan_blit_color_float.frag 25 vulkan_blit_color_float.frag
26 vulkan_blit_depth_stencil.frag 26 vulkan_blit_depth_stencil.frag
27 vulkan_fidelityfx_fsr_easu.comp 27 vulkan_fidelityfx_fsr_easu_fp16.comp
28 vulkan_fidelityfx_fsr_rcas.comp 28 vulkan_fidelityfx_fsr_easu_fp32.comp
29 vulkan_fidelityfx_fsr_rcas_fp16.comp
30 vulkan_fidelityfx_fsr_rcas_fp32.comp
29 vulkan_present.frag 31 vulkan_present.frag
30 vulkan_present.vert 32 vulkan_present.vert
33 vulkan_present_scaleforce_fp16.frag
34 vulkan_present_scaleforce_fp32.frag
31 vulkan_quad_indexed.comp 35 vulkan_quad_indexed.comp
32 vulkan_uint8.comp 36 vulkan_uint8.comp
33) 37)
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp
index cbb601580..6b97f789d 100644
--- a/src/video_core/host_shaders/fidelityfx_fsr.comp
+++ b/src/video_core/host_shaders/fidelityfx_fsr.comp
@@ -28,80 +28,82 @@
28// THE SOFTWARE. 28// THE SOFTWARE.
29 29
30layout( push_constant ) uniform constants { 30layout( push_constant ) uniform constants {
31 u32vec2 input_size; 31 uvec4 Const0;
32 uvec4 Const1;
33 uvec4 Const2;
34 uvec4 Const3;
32}; 35};
33 36
34uvec4 Const0; 37layout(set=0,binding=0) uniform sampler2D InputTexture;
35uvec4 Const1; 38layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
36uvec4 Const2;
37uvec4 Const3;
38 39
39#define A_GPU 1 40#define A_GPU 1
40#define A_GLSL 1 41#define A_GLSL 1
41#define A_HALF
42 42
43#include "ffx_a.h" 43#ifndef YUZU_USE_FP16
44 #include "ffx_a.h"
44 45
45f16vec4 LinearToSRGB(f16vec4 linear) { 46 #if USE_EASU
46 bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); 47 #define FSR_EASU_F 1
47 f16vec4 low = linear * float16_t(12.92); 48 AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
48 f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); 49 AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
49 return mix(low, high, selector); 50 AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
50} 51 #endif
51 52 #if USE_RCAS
52f16vec4 SRGBToLinear(f16vec4 srgb) { 53 #define FSR_RCAS_F 1
53 bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); 54 AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
54 f16vec4 low = srgb * float16_t(1.0 / 12.92); 55 void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
55 f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); 56 #endif
56 return mix(low, high, selector); 57#else
57} 58 #define A_HALF
59 #include "ffx_a.h"
58 60
59#if USE_EASU 61 #if USE_EASU
60 #define FSR_EASU_H 1 62 #define FSR_EASU_H 1
61 f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } 63 AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
62 f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } 64 AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
63 f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } 65 AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
64#endif 66 #endif
65#if USE_RCAS 67 #if USE_RCAS
66 #define FSR_RCAS_H 1 68 #define FSR_RCAS_H 1
67 f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } 69 AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
68 void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} 70 void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
71 #endif
69#endif 72#endif
70 73
71#include "ffx_fsr1.h" 74#include "ffx_fsr1.h"
72 75
73void CurrFilter(u32vec2 pos) { 76void CurrFilter(AU2 pos) {
74 // For debugging
75#if USE_BILINEAR 77#if USE_BILINEAR
76 vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); 78 AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
77 imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); 79 imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
78#endif 80#endif
79#if USE_EASU 81#if USE_EASU
80 f16vec3 c; 82 #ifndef YUZU_USE_FP16
81 FsrEasuH(c, pos, Const0, Const1, Const2, Const3); 83 AF3 c;
82 imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); 84 FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
85 imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
86 #else
87 AH3 c;
88 FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
89 imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
90 #endif
83#endif 91#endif
84#if USE_RCAS 92#if USE_RCAS
85 f16vec3 c; 93 #ifndef YUZU_USE_FP16
86 FsrRcasH(c.r, c.g, c.b, pos, Const0); 94 AF3 c;
87 imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); 95 FsrRcasF(c.r, c.g, c.b, pos, Const0);
96 imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
97 #else
98 AH3 c;
99 FsrRcasH(c.r, c.g, c.b, pos, Const0);
100 imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
101 #endif
88#endif 102#endif
89
90} 103}
91 104
92layout(local_size_x=64) in; 105layout(local_size_x=64) in;
93void main() { 106void main() {
94
95#if USE_EASU || USE_BILINEAR
96 vec2 ires = vec2(input_size);
97 vec2 tres = textureSize(InputTexture, 0);
98 vec2 ores = imageSize(OutputTexture);
99 FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y);
100#endif
101#if USE_RCAS
102 FsrRcasCon(Const0, 0.25f);
103#endif
104
105 // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. 107 // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
106 AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); 108 AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
107 CurrFilter(gxy); 109 CurrFilter(gxy);
diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag
index ebc0d9b90..71ff9e1e3 100644
--- a/src/video_core/host_shaders/present_scaleforce.frag
+++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag
@@ -22,11 +22,29 @@
22 22
23// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce 23// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
24 24
25#version 460 25//! #version 460
26
27#extension GL_ARB_separate_shader_objects : enable
28
29#ifdef YUZU_USE_FP16
26 30
27#extension GL_AMD_gpu_shader_half_float : enable 31#extension GL_AMD_gpu_shader_half_float : enable
28#extension GL_NV_gpu_shader5 : enable 32#extension GL_NV_gpu_shader5 : enable
29 33
34#define lfloat float16_t
35#define lvec2 f16vec2
36#define lvec3 f16vec3
37#define lvec4 f16vec4
38
39#else
40
41#define lfloat float
42#define lvec2 vec2
43#define lvec3 vec3
44#define lvec4 vec4
45
46#endif
47
30#ifdef VULKAN 48#ifdef VULKAN
31 49
32#define BINDING_COLOR_TEXTURE 1 50#define BINDING_COLOR_TEXTURE 1
@@ -45,25 +63,25 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
45 63
46const bool ignore_alpha = true; 64const bool ignore_alpha = true;
47 65
48float16_t ColorDist1(f16vec4 a, f16vec4 b) { 66lfloat ColorDist1(lvec4 a, lvec4 b) {
49 // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion 67 // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
50 const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); 68 const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
51 const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); 69 const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
52 const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); 70 const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
53 f16vec4 diff = a - b; 71 lvec4 diff = a - b;
54 float16_t Y = dot(diff.rgb, K); 72 lfloat Y = dot(diff.rgb, K);
55 float16_t Cb = scaleB * (diff.b - Y); 73 lfloat Cb = scaleB * (diff.b - Y);
56 float16_t Cr = scaleR * (diff.r - Y); 74 lfloat Cr = scaleR * (diff.r - Y);
57 f16vec3 YCbCr = f16vec3(Y, Cb, Cr); 75 lvec3 YCbCr = lvec3(Y, Cb, Cr);
58 float16_t d = length(YCbCr); 76 lfloat d = length(YCbCr);
59 if (ignore_alpha) { 77 if (ignore_alpha) {
60 return d; 78 return d;
61 } 79 }
62 return sqrt(a.a * b.a * d * d + diff.a * diff.a); 80 return sqrt(a.a * b.a * d * d + diff.a * diff.a);
63} 81}
64 82
65f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { 83lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
66 return f16vec4( 84 return lvec4(
67 ColorDist1(ref, A), 85 ColorDist1(ref, A),
68 ColorDist1(ref, B), 86 ColorDist1(ref, B),
69 ColorDist1(ref, C), 87 ColorDist1(ref, C),
@@ -72,36 +90,36 @@ f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) {
72} 90}
73 91
74vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { 92vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
75 f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); 93 lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
76 f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); 94 lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
77 f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); 95 lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
78 f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); 96 lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
79 f16vec4 cc = f16vec4(texture(tex, tex_coord)); 97 lvec4 cc = lvec4(texture(tex, tex_coord));
80 f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); 98 lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
81 f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); 99 lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
82 f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); 100 lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
83 f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); 101 lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
84 102
85 f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); 103 lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
86 f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); 104 lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
87 105
88 // Calculate how different cc is from the texels around it 106 // Calculate how different cc is from the texels around it
89 const float16_t plus_weight = float16_t(1.5); 107 const lfloat plus_weight = lfloat(1.5);
90 const float16_t cross_weight = float16_t(1.5); 108 const lfloat cross_weight = lfloat(1.5);
91 float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); 109 lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
92 110
93 if (total_dist == float16_t(0.0)) { 111 if (total_dist == lfloat(0.0)) {
94 return cc; 112 return cc;
95 } else { 113 } else {
96 // Add together all the distances with direction taken into account 114 // Add together all the distances with direction taken into account
97 f16vec4 tmp = offset_tl - offset_br; 115 lvec4 tmp = offset_tl - offset_br;
98 f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; 116 lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
99 117
100 // When the image has thin points, they tend to split apart. 118 // When the image has thin points, they tend to split apart.
101 // This is because the texels all around are different and total_offset reaches into clear areas. 119 // This is because the texels all around are different and total_offset reaches into clear areas.
102 // This works pretty well to keep the offset in bounds for these cases. 120 // This works pretty well to keep the offset in bounds for these cases.
103 float16_t clamp_val = length(total_offset) / total_dist; 121 lfloat clamp_val = length(total_offset) / total_dist;
104 f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); 122 vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
105 123
106 return texture(tex, tex_coord - final_offset); 124 return texture(tex, tex_coord - final_offset);
107 } 125 }
@@ -109,4 +127,4 @@ vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
109 127
110void main() { 128void main() {
111 frag_color = Scaleforce(input_texture, tex_coord); 129 frag_color = Scaleforce(input_texture, tex_coord);
112} \ No newline at end of file 130}
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
index 6525eeeb5..1c96a7905 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
@@ -5,9 +5,7 @@
5#version 460 core 5#version 460 core
6#extension GL_GOOGLE_include_directive : enable 6#extension GL_GOOGLE_include_directive : enable
7 7
8layout(set=0,binding=0) uniform sampler2D InputTexture; 8#define YUZU_USE_FP16
9layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
10
11#define USE_EASU 1 9#define USE_EASU 1
12 10
13#include "fidelityfx_fsr.comp" 11#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
new file mode 100644
index 000000000..f4daff739
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
@@ -0,0 +1,10 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 460 core
6#extension GL_GOOGLE_include_directive : enable
7
8#define USE_EASU 1
9
10#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
index 9463ed842..6b6796dd1 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
@@ -5,9 +5,7 @@
5#version 460 core 5#version 460 core
6#extension GL_GOOGLE_include_directive : enable 6#extension GL_GOOGLE_include_directive : enable
7 7
8layout(set=0,binding=0) uniform sampler2D InputTexture; 8#define YUZU_USE_FP16
9layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
10
11#define USE_RCAS 1 9#define USE_RCAS 1
12 10
13#include "fidelityfx_fsr.comp" 11#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
new file mode 100644
index 000000000..f785eebf3
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
@@ -0,0 +1,10 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 460 core
6#extension GL_GOOGLE_include_directive : enable
7
8#define USE_RCAS 1
9
10#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
new file mode 100644
index 000000000..924c03060
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
@@ -0,0 +1,7 @@
1#version 460
2
3#extension GL_GOOGLE_include_directive : enable
4
5#define YUZU_USE_FP16
6
7#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
new file mode 100644
index 000000000..a594b83ca
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
@@ -0,0 +1,5 @@
1#version 460
2
3#extension GL_GOOGLE_include_directive : enable
4
5#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e63f0bdd8..28daacd82 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -24,10 +24,10 @@
24#include "video_core/host_shaders/fxaa_frag.h" 24#include "video_core/host_shaders/fxaa_frag.h"
25#include "video_core/host_shaders/fxaa_vert.h" 25#include "video_core/host_shaders/fxaa_vert.h"
26#include "video_core/host_shaders/opengl_present_frag.h" 26#include "video_core/host_shaders/opengl_present_frag.h"
27#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
27#include "video_core/host_shaders/opengl_present_vert.h" 28#include "video_core/host_shaders/opengl_present_vert.h"
28#include "video_core/host_shaders/present_bicubic_frag.h" 29#include "video_core/host_shaders/present_bicubic_frag.h"
29#include "video_core/host_shaders/present_gaussian_frag.h" 30#include "video_core/host_shaders/present_gaussian_frag.h"
30#include "video_core/host_shaders/present_scaleforce_frag.h"
31#include "video_core/renderer_opengl/gl_rasterizer.h" 31#include "video_core/renderer_opengl/gl_rasterizer.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 32#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_shader_util.h" 33#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -266,7 +266,8 @@ void RendererOpenGL::InitOpenGLObjects() {
266 present_gaussian_fragment = 266 present_gaussian_fragment =
267 CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); 267 CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
268 present_scaleforce_fragment = 268 present_scaleforce_fragment =
269 CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); 269 CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
270 GL_FRAGMENT_SHADER);
270 271
271 // Generate presentation sampler 272 // Generate presentation sampler
272 present_sampler.Create(); 273 present_sampler.Create();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 2bed4f3c5..9dfc508bc 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -21,8 +21,9 @@
21#include "video_core/host_shaders/fxaa_vert_spv.h" 21#include "video_core/host_shaders/fxaa_vert_spv.h"
22#include "video_core/host_shaders/present_bicubic_frag_spv.h" 22#include "video_core/host_shaders/present_bicubic_frag_spv.h"
23#include "video_core/host_shaders/present_gaussian_frag_spv.h" 23#include "video_core/host_shaders/present_gaussian_frag_spv.h"
24#include "video_core/host_shaders/present_scaleforce_frag_spv.h"
25#include "video_core/host_shaders/vulkan_present_frag_spv.h" 24#include "video_core/host_shaders/vulkan_present_frag_spv.h"
25#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
26#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
26#include "video_core/host_shaders/vulkan_present_vert_spv.h" 27#include "video_core/host_shaders/vulkan_present_vert_spv.h"
27#include "video_core/renderer_vulkan/renderer_vulkan.h" 28#include "video_core/renderer_vulkan/renderer_vulkan.h"
28#include "video_core/renderer_vulkan/vk_blit_screen.h" 29#include "video_core/renderer_vulkan/vk_blit_screen.h"
@@ -328,7 +329,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
328 blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 329 blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
329 blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 330 blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
330 331
331 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , 332 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
332 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); 333 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
333 } 334 }
334 }); 335 });
@@ -344,8 +345,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
344 crop_rect.bottom = framebuffer.height; 345 crop_rect.bottom = framebuffer.height;
345 } 346 }
346 crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); 347 crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
348 VkExtent2D fsr_input_size{
349 .width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
350 .height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
351 };
347 VkImageView fsr_image_view = 352 VkImageView fsr_image_view =
348 fsr->Draw(scheduler, image_index, source_image_view, crop_rect); 353 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
349 UpdateDescriptorSet(image_index, fsr_image_view, true); 354 UpdateDescriptorSet(image_index, fsr_image_view, true);
350 } else { 355 } else {
351 const bool is_nn = 356 const bool is_nn =
@@ -500,7 +505,11 @@ void VKBlitScreen::CreateShaders() {
500 bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); 505 bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
501 bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); 506 bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
502 gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); 507 gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
503 scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); 508 if (device.IsFloat16Supported()) {
509 scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
510 } else {
511 scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
512 }
504} 513}
505 514
506void VKBlitScreen::CreateSemaphores() { 515void VKBlitScreen::CreateSemaphores() {
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index 1f60974be..9288aa7c2 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -4,13 +4,19 @@
4 4
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/div_ceil.h" 6#include "common/div_ceil.h"
7#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" 7#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
8#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" 8#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
9#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
10#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
9#include "video_core/renderer_vulkan/vk_fsr.h" 11#include "video_core/renderer_vulkan/vk_fsr.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 12#include "video_core/renderer_vulkan/vk_scheduler.h"
11#include "video_core/renderer_vulkan/vk_shader_util.h" 13#include "video_core/renderer_vulkan/vk_shader_util.h"
12#include "video_core/vulkan_common/vulkan_device.h" 14#include "video_core/vulkan_common/vulkan_device.h"
13 15
16#define A_CPU
17#include <ffx_a.h>
18#include <ffx_fsr1.h>
19
14namespace Vulkan { 20namespace Vulkan {
15 21
16FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, 22FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
@@ -29,11 +35,11 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
29} 35}
30 36
31VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, 37VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
32 const Common::Rectangle<int>& crop_rect) { 38 VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
33 39
34 UpdateDescriptorSet(image_index, image_view); 40 UpdateDescriptorSet(image_index, image_view);
35 41
36 scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) { 42 scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
37 const VkImageMemoryBarrier base_barrier{ 43 const VkImageMemoryBarrier base_barrier{
38 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 44 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
39 .pNext = nullptr, 45 .pNext = nullptr,
@@ -54,13 +60,18 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
54 }, 60 },
55 }; 61 };
56 62
57 // TODO: Support clear color
58 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); 63 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
59 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 64
60 VkExtent2D{ 65 std::array<AU1, 4 * 4> push_constants;
61 .width = static_cast<u32>(crop_rect.GetWidth()), 66 FsrEasuConOffset(
62 .height = static_cast<u32>(crop_rect.GetHeight()), 67 push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
63 }); 68 push_constants.data() + 12,
69
70 static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()),
71 static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height),
72 static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height),
73 static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top));
74 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
64 75
65 { 76 {
66 VkImageMemoryBarrier fsr_write_barrier = base_barrier; 77 VkImageMemoryBarrier fsr_write_barrier = base_barrier;
@@ -77,7 +88,9 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
77 Common::DivCeil(output_size.height, 16u), 1); 88 Common::DivCeil(output_size.height, 16u), 1);
78 89
79 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); 90 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
80 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size); 91
92 FsrRcasCon(push_constants.data(), 0.25f);
93 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
81 94
82 { 95 {
83 std::array<VkImageMemoryBarrier, 2> barriers; 96 std::array<VkImageMemoryBarrier, 2> barriers;
@@ -247,7 +260,7 @@ void FSR::CreatePipelineLayout() {
247 VkPushConstantRange push_const{ 260 VkPushConstantRange push_const{
248 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 261 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
249 .offset = 0, 262 .offset = 0,
250 .size = sizeof(std::array<u32, 2>), 263 .size = sizeof(std::array<u32, 4 * 4>),
251 }; 264 };
252 VkPipelineLayoutCreateInfo ci{ 265 VkPipelineLayoutCreateInfo ci{
253 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 266 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
@@ -344,8 +357,13 @@ void FSR::CreateSampler() {
344} 357}
345 358
346void FSR::CreateShaders() { 359void FSR::CreateShaders() {
347 easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); 360 if (device.IsFloat16Supported()) {
348 rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV); 361 easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
362 rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
363 } else {
364 easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
365 rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
366 }
349} 367}
350 368
351void FSR::CreatePipeline() { 369void FSR::CreatePipeline() {
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 8391e2e58..6bbec3d36 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -18,7 +18,7 @@ public:
18 explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, 18 explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
19 VkExtent2D output_size); 19 VkExtent2D output_size);
20 VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, 20 VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
21 const Common::Rectangle<int>& crop_rect); 21 VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
22 22
23private: 23private:
24 void CreateDescriptorPool(); 24 void CreateDescriptorPool();