diff options
| author | 2021-10-22 23:09:29 -0500 | |
|---|---|---|
| committer | 2021-11-16 22:11:32 +0100 | |
| commit | dcc5b4f6b005a2c89bb4e77bca4cfe8705734021 (patch) | |
| tree | 8f18cc5669dfcd150e3b97eca7acb91a0738b4ba /src | |
| parent | renderer_vulkan/blit_image: Use generic color state on Depth to Color blits (diff) | |
| download | yuzu-dcc5b4f6b005a2c89bb4e77bca4cfe8705734021.tar.gz yuzu-dcc5b4f6b005a2c89bb4e77bca4cfe8705734021.tar.xz yuzu-dcc5b4f6b005a2c89bb4e77bca4cfe8705734021.zip | |
Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan
Diffstat (limited to 'src')
14 files changed, 197 insertions, 116 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..07b94dcc8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -237,6 +237,7 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) | |||
| 237 | target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) | 237 | target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) |
| 238 | target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) | 238 | target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) |
| 239 | 239 | ||
| 240 | target_link_libraries(video_core PRIVATE ffx-fsr) | ||
| 240 | add_dependencies(video_core host_shaders) | 241 | add_dependencies(video_core host_shaders) |
| 241 | target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) | 242 | target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) |
| 242 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | 243 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6b5ea649a..d779a967a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -18,16 +18,20 @@ set(SHADER_FILES | |||
| 18 | opengl_copy_bc4.comp | 18 | opengl_copy_bc4.comp |
| 19 | opengl_present.frag | 19 | opengl_present.frag |
| 20 | opengl_present.vert | 20 | opengl_present.vert |
| 21 | opengl_present_scaleforce.frag | ||
| 21 | pitch_unswizzle.comp | 22 | pitch_unswizzle.comp |
| 22 | present_scaleforce.frag | ||
| 23 | present_bicubic.frag | 23 | present_bicubic.frag |
| 24 | present_gaussian.frag | 24 | present_gaussian.frag |
| 25 | vulkan_blit_color_float.frag | 25 | vulkan_blit_color_float.frag |
| 26 | vulkan_blit_depth_stencil.frag | 26 | vulkan_blit_depth_stencil.frag |
| 27 | vulkan_fidelityfx_fsr_easu.comp | 27 | vulkan_fidelityfx_fsr_easu_fp16.comp |
| 28 | vulkan_fidelityfx_fsr_rcas.comp | 28 | vulkan_fidelityfx_fsr_easu_fp32.comp |
| 29 | vulkan_fidelityfx_fsr_rcas_fp16.comp | ||
| 30 | vulkan_fidelityfx_fsr_rcas_fp32.comp | ||
| 29 | vulkan_present.frag | 31 | vulkan_present.frag |
| 30 | vulkan_present.vert | 32 | vulkan_present.vert |
| 33 | vulkan_present_scaleforce_fp16.frag | ||
| 34 | vulkan_present_scaleforce_fp32.frag | ||
| 31 | vulkan_quad_indexed.comp | 35 | vulkan_quad_indexed.comp |
| 32 | vulkan_uint8.comp | 36 | vulkan_uint8.comp |
| 33 | ) | 37 | ) |
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp index cbb601580..6b97f789d 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp | |||
| @@ -28,80 +28,82 @@ | |||
| 28 | // THE SOFTWARE. | 28 | // THE SOFTWARE. |
| 29 | 29 | ||
| 30 | layout( push_constant ) uniform constants { | 30 | layout( push_constant ) uniform constants { |
| 31 | u32vec2 input_size; | 31 | uvec4 Const0; |
| 32 | uvec4 Const1; | ||
| 33 | uvec4 Const2; | ||
| 34 | uvec4 Const3; | ||
| 32 | }; | 35 | }; |
| 33 | 36 | ||
| 34 | uvec4 Const0; | 37 | layout(set=0,binding=0) uniform sampler2D InputTexture; |
| 35 | uvec4 Const1; | 38 | layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; |
| 36 | uvec4 Const2; | ||
| 37 | uvec4 Const3; | ||
| 38 | 39 | ||
| 39 | #define A_GPU 1 | 40 | #define A_GPU 1 |
| 40 | #define A_GLSL 1 | 41 | #define A_GLSL 1 |
| 41 | #define A_HALF | ||
| 42 | 42 | ||
| 43 | #include "ffx_a.h" | 43 | #ifndef YUZU_USE_FP16 |
| 44 | #include "ffx_a.h" | ||
| 44 | 45 | ||
| 45 | f16vec4 LinearToSRGB(f16vec4 linear) { | 46 | #if USE_EASU |
| 46 | bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); | 47 | #define FSR_EASU_F 1 |
| 47 | f16vec4 low = linear * float16_t(12.92); | 48 | AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } |
| 48 | f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); | 49 | AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } |
| 49 | return mix(low, high, selector); | 50 | AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } |
| 50 | } | 51 | #endif |
| 51 | 52 | #if USE_RCAS | |
| 52 | f16vec4 SRGBToLinear(f16vec4 srgb) { | 53 | #define FSR_RCAS_F 1 |
| 53 | bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); | 54 | AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } |
| 54 | f16vec4 low = srgb * float16_t(1.0 / 12.92); | 55 | void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} |
| 55 | f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); | 56 | #endif |
| 56 | return mix(low, high, selector); | 57 | #else |
| 57 | } | 58 | #define A_HALF |
| 59 | #include "ffx_a.h" | ||
| 58 | 60 | ||
| 59 | #if USE_EASU | 61 | #if USE_EASU |
| 60 | #define FSR_EASU_H 1 | 62 | #define FSR_EASU_H 1 |
| 61 | f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } | 63 | AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } |
| 62 | f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } | 64 | AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } |
| 63 | f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } | 65 | AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } |
| 64 | #endif | 66 | #endif |
| 65 | #if USE_RCAS | 67 | #if USE_RCAS |
| 66 | #define FSR_RCAS_H 1 | 68 | #define FSR_RCAS_H 1 |
| 67 | f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } | 69 | AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } |
| 68 | void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} | 70 | void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} |
| 71 | #endif | ||
| 69 | #endif | 72 | #endif |
| 70 | 73 | ||
| 71 | #include "ffx_fsr1.h" | 74 | #include "ffx_fsr1.h" |
| 72 | 75 | ||
| 73 | void CurrFilter(u32vec2 pos) { | 76 | void CurrFilter(AU2 pos) { |
| 74 | // For debugging | ||
| 75 | #if USE_BILINEAR | 77 | #if USE_BILINEAR |
| 76 | vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); | 78 | AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); |
| 77 | imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); | 79 | imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); |
| 78 | #endif | 80 | #endif |
| 79 | #if USE_EASU | 81 | #if USE_EASU |
| 80 | f16vec3 c; | 82 | #ifndef YUZU_USE_FP16 |
| 81 | FsrEasuH(c, pos, Const0, Const1, Const2, Const3); | 83 | AF3 c; |
| 82 | imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); | 84 | FsrEasuF(c, pos, Const0, Const1, Const2, Const3); |
| 85 | imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); | ||
| 86 | #else | ||
| 87 | AH3 c; | ||
| 88 | FsrEasuH(c, pos, Const0, Const1, Const2, Const3); | ||
| 89 | imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); | ||
| 90 | #endif | ||
| 83 | #endif | 91 | #endif |
| 84 | #if USE_RCAS | 92 | #if USE_RCAS |
| 85 | f16vec3 c; | 93 | #ifndef YUZU_USE_FP16 |
| 86 | FsrRcasH(c.r, c.g, c.b, pos, Const0); | 94 | AF3 c; |
| 87 | imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); | 95 | FsrRcasF(c.r, c.g, c.b, pos, Const0); |
| 96 | imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); | ||
| 97 | #else | ||
| 98 | AH3 c; | ||
| 99 | FsrRcasH(c.r, c.g, c.b, pos, Const0); | ||
| 100 | imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); | ||
| 101 | #endif | ||
| 88 | #endif | 102 | #endif |
| 89 | |||
| 90 | } | 103 | } |
| 91 | 104 | ||
| 92 | layout(local_size_x=64) in; | 105 | layout(local_size_x=64) in; |
| 93 | void main() { | 106 | void main() { |
| 94 | |||
| 95 | #if USE_EASU || USE_BILINEAR | ||
| 96 | vec2 ires = vec2(input_size); | ||
| 97 | vec2 tres = textureSize(InputTexture, 0); | ||
| 98 | vec2 ores = imageSize(OutputTexture); | ||
| 99 | FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); | ||
| 100 | #endif | ||
| 101 | #if USE_RCAS | ||
| 102 | FsrRcasCon(Const0, 0.25f); | ||
| 103 | #endif | ||
| 104 | |||
| 105 | // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. | 107 | // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. |
| 106 | AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); | 108 | AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); |
| 107 | CurrFilter(gxy); | 109 | CurrFilter(gxy); |
diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag index ebc0d9b90..71ff9e1e3 100644 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag | |||
| @@ -22,11 +22,29 @@ | |||
| 22 | 22 | ||
| 23 | // Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce | 23 | // Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce |
| 24 | 24 | ||
| 25 | #version 460 | 25 | //! #version 460 |
| 26 | |||
| 27 | #extension GL_ARB_separate_shader_objects : enable | ||
| 28 | |||
| 29 | #ifdef YUZU_USE_FP16 | ||
| 26 | 30 | ||
| 27 | #extension GL_AMD_gpu_shader_half_float : enable | 31 | #extension GL_AMD_gpu_shader_half_float : enable |
| 28 | #extension GL_NV_gpu_shader5 : enable | 32 | #extension GL_NV_gpu_shader5 : enable |
| 29 | 33 | ||
| 34 | #define lfloat float16_t | ||
| 35 | #define lvec2 f16vec2 | ||
| 36 | #define lvec3 f16vec3 | ||
| 37 | #define lvec4 f16vec4 | ||
| 38 | |||
| 39 | #else | ||
| 40 | |||
| 41 | #define lfloat float | ||
| 42 | #define lvec2 vec2 | ||
| 43 | #define lvec3 vec3 | ||
| 44 | #define lvec4 vec4 | ||
| 45 | |||
| 46 | #endif | ||
| 47 | |||
| 30 | #ifdef VULKAN | 48 | #ifdef VULKAN |
| 31 | 49 | ||
| 32 | #define BINDING_COLOR_TEXTURE 1 | 50 | #define BINDING_COLOR_TEXTURE 1 |
| @@ -45,25 +63,25 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; | |||
| 45 | 63 | ||
| 46 | const bool ignore_alpha = true; | 64 | const bool ignore_alpha = true; |
| 47 | 65 | ||
| 48 | float16_t ColorDist1(f16vec4 a, f16vec4 b) { | 66 | lfloat ColorDist1(lvec4 a, lvec4 b) { |
| 49 | // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion | 67 | // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion |
| 50 | const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); | 68 | const lvec3 K = lvec3(0.2627, 0.6780, 0.0593); |
| 51 | const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); | 69 | const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b); |
| 52 | const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); | 70 | const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r); |
| 53 | f16vec4 diff = a - b; | 71 | lvec4 diff = a - b; |
| 54 | float16_t Y = dot(diff.rgb, K); | 72 | lfloat Y = dot(diff.rgb, K); |
| 55 | float16_t Cb = scaleB * (diff.b - Y); | 73 | lfloat Cb = scaleB * (diff.b - Y); |
| 56 | float16_t Cr = scaleR * (diff.r - Y); | 74 | lfloat Cr = scaleR * (diff.r - Y); |
| 57 | f16vec3 YCbCr = f16vec3(Y, Cb, Cr); | 75 | lvec3 YCbCr = lvec3(Y, Cb, Cr); |
| 58 | float16_t d = length(YCbCr); | 76 | lfloat d = length(YCbCr); |
| 59 | if (ignore_alpha) { | 77 | if (ignore_alpha) { |
| 60 | return d; | 78 | return d; |
| 61 | } | 79 | } |
| 62 | return sqrt(a.a * b.a * d * d + diff.a * diff.a); | 80 | return sqrt(a.a * b.a * d * d + diff.a * diff.a); |
| 63 | } | 81 | } |
| 64 | 82 | ||
| 65 | f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { | 83 | lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) { |
| 66 | return f16vec4( | 84 | return lvec4( |
| 67 | ColorDist1(ref, A), | 85 | ColorDist1(ref, A), |
| 68 | ColorDist1(ref, B), | 86 | ColorDist1(ref, B), |
| 69 | ColorDist1(ref, C), | 87 | ColorDist1(ref, C), |
| @@ -72,36 +90,36 @@ f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { | |||
| 72 | } | 90 | } |
| 73 | 91 | ||
| 74 | vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { | 92 | vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { |
| 75 | f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); | 93 | lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); |
| 76 | f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); | 94 | lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1))); |
| 77 | f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); | 95 | lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1))); |
| 78 | f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); | 96 | lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); |
| 79 | f16vec4 cc = f16vec4(texture(tex, tex_coord)); | 97 | lvec4 cc = lvec4(texture(tex, tex_coord)); |
| 80 | f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); | 98 | lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0))); |
| 81 | f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); | 99 | lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); |
| 82 | f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); | 100 | lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1))); |
| 83 | f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); | 101 | lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1))); |
| 84 | 102 | ||
| 85 | f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); | 103 | lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); |
| 86 | f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); | 104 | lvec4 offset_br = ColorDist(cc, br, bc, bl, cl); |
| 87 | 105 | ||
| 88 | // Calculate how different cc is from the texels around it | 106 | // Calculate how different cc is from the texels around it |
| 89 | const float16_t plus_weight = float16_t(1.5); | 107 | const lfloat plus_weight = lfloat(1.5); |
| 90 | const float16_t cross_weight = float16_t(1.5); | 108 | const lfloat cross_weight = lfloat(1.5); |
| 91 | float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); | 109 | lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight)); |
| 92 | 110 | ||
| 93 | if (total_dist == float16_t(0.0)) { | 111 | if (total_dist == lfloat(0.0)) { |
| 94 | return cc; | 112 | return cc; |
| 95 | } else { | 113 | } else { |
| 96 | // Add together all the distances with direction taken into account | 114 | // Add together all the distances with direction taken into account |
| 97 | f16vec4 tmp = offset_tl - offset_br; | 115 | lvec4 tmp = offset_tl - offset_br; |
| 98 | f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; | 116 | lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight; |
| 99 | 117 | ||
| 100 | // When the image has thin points, they tend to split apart. | 118 | // When the image has thin points, they tend to split apart. |
| 101 | // This is because the texels all around are different and total_offset reaches into clear areas. | 119 | // This is because the texels all around are different and total_offset reaches into clear areas. |
| 102 | // This works pretty well to keep the offset in bounds for these cases. | 120 | // This works pretty well to keep the offset in bounds for these cases. |
| 103 | float16_t clamp_val = length(total_offset) / total_dist; | 121 | lfloat clamp_val = length(total_offset) / total_dist; |
| 104 | f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); | 122 | vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0); |
| 105 | 123 | ||
| 106 | return texture(tex, tex_coord - final_offset); | 124 | return texture(tex, tex_coord - final_offset); |
| 107 | } | 125 | } |
| @@ -109,4 +127,4 @@ vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { | |||
| 109 | 127 | ||
| 110 | void main() { | 128 | void main() { |
| 111 | frag_color = Scaleforce(input_texture, tex_coord); | 129 | frag_color = Scaleforce(input_texture, tex_coord); |
| 112 | } \ No newline at end of file | 130 | } |
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp index 6525eeeb5..1c96a7905 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp | |||
| @@ -5,9 +5,7 @@ | |||
| 5 | #version 460 core | 5 | #version 460 core |
| 6 | #extension GL_GOOGLE_include_directive : enable | 6 | #extension GL_GOOGLE_include_directive : enable |
| 7 | 7 | ||
| 8 | layout(set=0,binding=0) uniform sampler2D InputTexture; | 8 | #define YUZU_USE_FP16 |
| 9 | layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; | ||
| 10 | |||
| 11 | #define USE_EASU 1 | 9 | #define USE_EASU 1 |
| 12 | 10 | ||
| 13 | #include "fidelityfx_fsr.comp" | 11 | #include "fidelityfx_fsr.comp" |
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp new file mode 100644 index 000000000..f4daff739 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | #extension GL_GOOGLE_include_directive : enable | ||
| 7 | |||
| 8 | #define USE_EASU 1 | ||
| 9 | |||
| 10 | #include "fidelityfx_fsr.comp" | ||
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp index 9463ed842..6b6796dd1 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp | |||
| @@ -5,9 +5,7 @@ | |||
| 5 | #version 460 core | 5 | #version 460 core |
| 6 | #extension GL_GOOGLE_include_directive : enable | 6 | #extension GL_GOOGLE_include_directive : enable |
| 7 | 7 | ||
| 8 | layout(set=0,binding=0) uniform sampler2D InputTexture; | 8 | #define YUZU_USE_FP16 |
| 9 | layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; | ||
| 10 | |||
| 11 | #define USE_RCAS 1 | 9 | #define USE_RCAS 1 |
| 12 | 10 | ||
| 13 | #include "fidelityfx_fsr.comp" | 11 | #include "fidelityfx_fsr.comp" |
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp new file mode 100644 index 000000000..f785eebf3 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | #extension GL_GOOGLE_include_directive : enable | ||
| 7 | |||
| 8 | #define USE_RCAS 1 | ||
| 9 | |||
| 10 | #include "fidelityfx_fsr.comp" | ||
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag new file mode 100644 index 000000000..924c03060 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | #version 460 | ||
| 2 | |||
| 3 | #extension GL_GOOGLE_include_directive : enable | ||
| 4 | |||
| 5 | #define YUZU_USE_FP16 | ||
| 6 | |||
| 7 | #include "opengl_present_scaleforce.frag" | ||
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag new file mode 100644 index 000000000..a594b83ca --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #version 460 | ||
| 2 | |||
| 3 | #extension GL_GOOGLE_include_directive : enable | ||
| 4 | |||
| 5 | #include "opengl_present_scaleforce.frag" | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e63f0bdd8..28daacd82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -24,10 +24,10 @@ | |||
| 24 | #include "video_core/host_shaders/fxaa_frag.h" | 24 | #include "video_core/host_shaders/fxaa_frag.h" |
| 25 | #include "video_core/host_shaders/fxaa_vert.h" | 25 | #include "video_core/host_shaders/fxaa_vert.h" |
| 26 | #include "video_core/host_shaders/opengl_present_frag.h" | 26 | #include "video_core/host_shaders/opengl_present_frag.h" |
| 27 | #include "video_core/host_shaders/opengl_present_scaleforce_frag.h" | ||
| 27 | #include "video_core/host_shaders/opengl_present_vert.h" | 28 | #include "video_core/host_shaders/opengl_present_vert.h" |
| 28 | #include "video_core/host_shaders/present_bicubic_frag.h" | 29 | #include "video_core/host_shaders/present_bicubic_frag.h" |
| 29 | #include "video_core/host_shaders/present_gaussian_frag.h" | 30 | #include "video_core/host_shaders/present_gaussian_frag.h" |
| 30 | #include "video_core/host_shaders/present_scaleforce_frag.h" | ||
| 31 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 31 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_shader_util.h" | 33 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| @@ -266,7 +266,8 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 266 | present_gaussian_fragment = | 266 | present_gaussian_fragment = |
| 267 | CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); | 267 | CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); |
| 268 | present_scaleforce_fragment = | 268 | present_scaleforce_fragment = |
| 269 | CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); | 269 | CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), |
| 270 | GL_FRAGMENT_SHADER); | ||
| 270 | 271 | ||
| 271 | // Generate presentation sampler | 272 | // Generate presentation sampler |
| 272 | present_sampler.Create(); | 273 | present_sampler.Create(); |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2bed4f3c5..9dfc508bc 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -21,8 +21,9 @@ | |||
| 21 | #include "video_core/host_shaders/fxaa_vert_spv.h" | 21 | #include "video_core/host_shaders/fxaa_vert_spv.h" |
| 22 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" | 22 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" |
| 23 | #include "video_core/host_shaders/present_gaussian_frag_spv.h" | 23 | #include "video_core/host_shaders/present_gaussian_frag_spv.h" |
| 24 | #include "video_core/host_shaders/present_scaleforce_frag_spv.h" | ||
| 25 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" | 24 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" |
| 25 | #include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" | ||
| 26 | #include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" | ||
| 26 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" | 27 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" |
| 27 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 28 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 28 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 29 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| @@ -328,7 +329,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 328 | blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | 329 | blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; |
| 329 | blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | 330 | blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; |
| 330 | 331 | ||
| 331 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , | 332 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| 332 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); | 333 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); |
| 333 | } | 334 | } |
| 334 | }); | 335 | }); |
| @@ -344,8 +345,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 344 | crop_rect.bottom = framebuffer.height; | 345 | crop_rect.bottom = framebuffer.height; |
| 345 | } | 346 | } |
| 346 | crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); | 347 | crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); |
| 348 | VkExtent2D fsr_input_size{ | ||
| 349 | .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), | ||
| 350 | .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), | ||
| 351 | }; | ||
| 347 | VkImageView fsr_image_view = | 352 | VkImageView fsr_image_view = |
| 348 | fsr->Draw(scheduler, image_index, source_image_view, crop_rect); | 353 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); |
| 349 | UpdateDescriptorSet(image_index, fsr_image_view, true); | 354 | UpdateDescriptorSet(image_index, fsr_image_view, true); |
| 350 | } else { | 355 | } else { |
| 351 | const bool is_nn = | 356 | const bool is_nn = |
| @@ -500,7 +505,11 @@ void VKBlitScreen::CreateShaders() { | |||
| 500 | bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); | 505 | bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); |
| 501 | bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); | 506 | bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); |
| 502 | gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); | 507 | gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); |
| 503 | scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); | 508 | if (device.IsFloat16Supported()) { |
| 509 | scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); | ||
| 510 | } else { | ||
| 511 | scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); | ||
| 512 | } | ||
| 504 | } | 513 | } |
| 505 | 514 | ||
| 506 | void VKBlitScreen::CreateSemaphores() { | 515 | void VKBlitScreen::CreateSemaphores() { |
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 1f60974be..9288aa7c2 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp | |||
| @@ -4,13 +4,19 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 6 | #include "common/div_ceil.h" | 6 | #include "common/div_ceil.h" |
| 7 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" | 7 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" |
| 8 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" | 8 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" |
| 9 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" | ||
| 10 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_fsr.h" | 11 | #include "video_core/renderer_vulkan/vk_fsr.h" |
| 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 11 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 13 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 12 | #include "video_core/vulkan_common/vulkan_device.h" | 14 | #include "video_core/vulkan_common/vulkan_device.h" |
| 13 | 15 | ||
| 16 | #define A_CPU | ||
| 17 | #include <ffx_a.h> | ||
| 18 | #include <ffx_fsr1.h> | ||
| 19 | |||
| 14 | namespace Vulkan { | 20 | namespace Vulkan { |
| 15 | 21 | ||
| 16 | FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, | 22 | FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, |
| @@ -29,11 +35,11 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image | |||
| 29 | } | 35 | } |
| 30 | 36 | ||
| 31 | VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, | 37 | VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, |
| 32 | const Common::Rectangle<int>& crop_rect) { | 38 | VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { |
| 33 | 39 | ||
| 34 | UpdateDescriptorSet(image_index, image_view); | 40 | UpdateDescriptorSet(image_index, image_view); |
| 35 | 41 | ||
| 36 | scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) { | 42 | scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { |
| 37 | const VkImageMemoryBarrier base_barrier{ | 43 | const VkImageMemoryBarrier base_barrier{ |
| 38 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 44 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 39 | .pNext = nullptr, | 45 | .pNext = nullptr, |
| @@ -54,13 +60,18 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im | |||
| 54 | }, | 60 | }, |
| 55 | }; | 61 | }; |
| 56 | 62 | ||
| 57 | // TODO: Support clear color | ||
| 58 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); | 63 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); |
| 59 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, | 64 | |
| 60 | VkExtent2D{ | 65 | std::array<AU1, 4 * 4> push_constants; |
| 61 | .width = static_cast<u32>(crop_rect.GetWidth()), | 66 | FsrEasuConOffset( |
| 62 | .height = static_cast<u32>(crop_rect.GetHeight()), | 67 | push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, |
| 63 | }); | 68 | push_constants.data() + 12, |
| 69 | |||
| 70 | static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()), | ||
| 71 | static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height), | ||
| 72 | static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height), | ||
| 73 | static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top)); | ||
| 74 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); | ||
| 64 | 75 | ||
| 65 | { | 76 | { |
| 66 | VkImageMemoryBarrier fsr_write_barrier = base_barrier; | 77 | VkImageMemoryBarrier fsr_write_barrier = base_barrier; |
| @@ -77,7 +88,9 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im | |||
| 77 | Common::DivCeil(output_size.height, 16u), 1); | 88 | Common::DivCeil(output_size.height, 16u), 1); |
| 78 | 89 | ||
| 79 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); | 90 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); |
| 80 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size); | 91 | |
| 92 | FsrRcasCon(push_constants.data(), 0.25f); | ||
| 93 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); | ||
| 81 | 94 | ||
| 82 | { | 95 | { |
| 83 | std::array<VkImageMemoryBarrier, 2> barriers; | 96 | std::array<VkImageMemoryBarrier, 2> barriers; |
| @@ -247,7 +260,7 @@ void FSR::CreatePipelineLayout() { | |||
| 247 | VkPushConstantRange push_const{ | 260 | VkPushConstantRange push_const{ |
| 248 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 261 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 249 | .offset = 0, | 262 | .offset = 0, |
| 250 | .size = sizeof(std::array<u32, 2>), | 263 | .size = sizeof(std::array<u32, 4 * 4>), |
| 251 | }; | 264 | }; |
| 252 | VkPipelineLayoutCreateInfo ci{ | 265 | VkPipelineLayoutCreateInfo ci{ |
| 253 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 266 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| @@ -344,8 +357,13 @@ void FSR::CreateSampler() { | |||
| 344 | } | 357 | } |
| 345 | 358 | ||
| 346 | void FSR::CreateShaders() { | 359 | void FSR::CreateShaders() { |
| 347 | easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); | 360 | if (device.IsFloat16Supported()) { |
| 348 | rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV); | 361 | easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); |
| 362 | rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); | ||
| 363 | } else { | ||
| 364 | easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); | ||
| 365 | rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); | ||
| 366 | } | ||
| 349 | } | 367 | } |
| 350 | 368 | ||
| 351 | void FSR::CreatePipeline() { | 369 | void FSR::CreatePipeline() { |
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 8391e2e58..6bbec3d36 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h | |||
| @@ -18,7 +18,7 @@ public: | |||
| 18 | explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, | 18 | explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, |
| 19 | VkExtent2D output_size); | 19 | VkExtent2D output_size); |
| 20 | VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, | 20 | VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, |
| 21 | const Common::Rectangle<int>& crop_rect); | 21 | VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); |
| 22 | 22 | ||
| 23 | private: | 23 | private: |
| 24 | void CreateDescriptorPool(); | 24 | void CreateDescriptorPool(); |