diff options
| author | 2021-10-20 13:35:59 -0500 | |
|---|---|---|
| committer | 2021-11-16 22:11:32 +0100 | |
| commit | 916b882ea8870e695d50e8ca8c8e4c35fb1895d5 (patch) | |
| tree | ba367700e34823783c07d0de5365d5f6431ae860 | |
| parent | VideoCore: Add more rescaling option. (diff) | |
| download | yuzu-916b882ea8870e695d50e8ca8c8e4c35fb1895d5.tar.gz yuzu-916b882ea8870e695d50e8ca8c8e4c35fb1895d5.tar.xz yuzu-916b882ea8870e695d50e8ca8c8e4c35fb1895d5.zip | |
Update scaleforce to use FP16
| -rw-r--r-- | src/video_core/host_shaders/present_scaleforce.frag | 143 |
1 files changed, 55 insertions, 88 deletions
diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag index 1829a9be8..ebc0d9b90 100644 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ b/src/video_core/host_shaders/present_scaleforce.frag | |||
| @@ -24,6 +24,9 @@ | |||
| 24 | 24 | ||
| 25 | #version 460 | 25 | #version 460 |
| 26 | 26 | ||
| 27 | #extension GL_AMD_gpu_shader_half_float : enable | ||
| 28 | #extension GL_NV_gpu_shader5 : enable | ||
| 29 | |||
| 27 | #ifdef VULKAN | 30 | #ifdef VULKAN |
| 28 | 31 | ||
| 29 | #define BINDING_COLOR_TEXTURE 1 | 32 | #define BINDING_COLOR_TEXTURE 1 |
| @@ -40,106 +43,70 @@ layout (location = 0) out vec4 frag_color; | |||
| 40 | 43 | ||
| 41 | layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; | 44 | layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; |
| 42 | 45 | ||
| 43 | vec2 tex_size; | 46 | const bool ignore_alpha = true; |
| 44 | vec2 inv_tex_size; | ||
| 45 | |||
| 46 | vec4 cubic(float v) { | ||
| 47 | vec3 n = vec3(1.0, 2.0, 3.0) - v; | ||
| 48 | vec3 s = n * n * n; | ||
| 49 | float x = s.x; | ||
| 50 | float y = s.y - 4.0 * s.x; | ||
| 51 | float z = s.z - 4.0 * s.y + 6.0 * s.x; | ||
| 52 | float w = 6.0 - x - y - z; | ||
| 53 | return vec4(x, y, z, w) / 6.0; | ||
| 54 | } | ||
| 55 | |||
| 56 | // Bicubic interpolation | ||
| 57 | vec4 textureBicubic(vec2 tex_coords) { | ||
| 58 | tex_coords = tex_coords * tex_size - 0.5; | ||
| 59 | |||
| 60 | vec2 fxy = modf(tex_coords, tex_coords); | ||
| 61 | |||
| 62 | vec4 xcubic = cubic(fxy.x); | ||
| 63 | vec4 ycubic = cubic(fxy.y); | ||
| 64 | |||
| 65 | vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; | ||
| 66 | 47 | ||
| 67 | vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); | 48 | float16_t ColorDist1(f16vec4 a, f16vec4 b) { |
| 68 | vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; | ||
| 69 | |||
| 70 | offset *= inv_tex_size.xxyy; | ||
| 71 | |||
| 72 | vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); | ||
| 73 | vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); | ||
| 74 | vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); | ||
| 75 | vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); | ||
| 76 | |||
| 77 | float sx = s.x / (s.x + s.y); | ||
| 78 | float sy = s.z / (s.z + s.w); | ||
| 79 | |||
| 80 | return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); | ||
| 81 | } | ||
| 82 | |||
| 83 | mat4x3 center_matrix; | ||
| 84 | vec4 center_alpha; | ||
| 85 | |||
| 86 | // Finds the distance between four colors and cc in YCbCr space | ||
| 87 | vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { | ||
| 88 | // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion | 49 | // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion |
| 89 | const vec3 K = vec3(0.2627, 0.6780, 0.0593); | 50 | const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); |
| 90 | const float LUMINANCE_WEIGHT = .6; | 51 | const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); |
| 91 | const mat3 YCBCR_MATRIX = | 52 | const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); |
| 92 | mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, | 53 | f16vec4 diff = a - b; |
| 93 | -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); | 54 | float16_t Y = dot(diff.rgb, K); |
| 94 | 55 | float16_t Cb = scaleB * (diff.b - Y); | |
| 95 | mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; | 56 | float16_t Cr = scaleR * (diff.r - Y); |
| 96 | mat4x3 YCbCr = YCBCR_MATRIX * colors; | 57 | f16vec3 YCbCr = f16vec3(Y, Cb, Cr); |
| 97 | vec4 color_dist = vec3(1.0) * YCbCr; | 58 | float16_t d = length(YCbCr); |
| 98 | color_dist *= color_dist; | 59 | if (ignore_alpha) { |
| 99 | vec4 alpha = vec4(A.a, B.a, C.a, D.a); | 60 | return d; |
| 100 | 61 | } | |
| 101 | return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); | 62 | return sqrt(a.a * b.a * d * d + diff.a * diff.a); |
| 102 | } | 63 | } |
| 103 | 64 | ||
| 104 | void main() { | 65 | f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { |
| 105 | vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); | 66 | return f16vec4( |
| 106 | vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); | 67 | ColorDist1(ref, A), |
| 107 | vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); | 68 | ColorDist1(ref, B), |
| 108 | vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); | 69 | ColorDist1(ref, C), |
| 109 | vec4 cc = textureLod(input_texture, tex_coord, 0.0); | 70 | ColorDist1(ref, D) |
| 110 | vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); | 71 | ); |
| 111 | vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); | 72 | } |
| 112 | vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); | ||
| 113 | vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); | ||
| 114 | |||
| 115 | 73 | ||
| 116 | tex_size = vec2(textureSize(input_texture, 0)); | 74 | vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { |
| 117 | inv_tex_size = 1.0 / tex_size; | 75 | f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); |
| 118 | center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); | 76 | f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); |
| 119 | center_alpha = cc.aaaa; | 77 | f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); |
| 78 | f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); | ||
| 79 | f16vec4 cc = f16vec4(texture(tex, tex_coord)); | ||
| 80 | f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); | ||
| 81 | f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); | ||
| 82 | f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); | ||
| 83 | f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); | ||
| 120 | 84 | ||
| 121 | vec4 offset_tl = ColorDist(tl, tc, tr, cr); | 85 | f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); |
| 122 | vec4 offset_br = ColorDist(br, bc, bl, cl); | 86 | f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); |
| 123 | 87 | ||
| 124 | // Calculate how different cc is from the texels around it | 88 | // Calculate how different cc is from the texels around it |
| 125 | float total_dist = dot(offset_tl + offset_br, vec4(1.0)); | 89 | const float16_t plus_weight = float16_t(1.5); |
| 126 | 90 | const float16_t cross_weight = float16_t(1.5); | |
| 127 | // Add together all the distances with direction taken into account | 91 | float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); |
| 128 | vec4 tmp = offset_tl - offset_br; | ||
| 129 | vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); | ||
| 130 | 92 | ||
| 131 | if (total_dist == 0.0) { | 93 | if (total_dist == float16_t(0.0)) { |
| 132 | // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters | 94 | return cc; |
| 133 | // and it doesn't really matter which filter is used when the colors aren't changing. | ||
| 134 | frag_color = vec4(cc.rgb, 1.0f); | ||
| 135 | } else { | 95 | } else { |
| 96 | // Add together all the distances with direction taken into account | ||
| 97 | f16vec4 tmp = offset_tl - offset_br; | ||
| 98 | f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; | ||
| 99 | |||
| 136 | // When the image has thin points, they tend to split apart. | 100 | // When the image has thin points, they tend to split apart. |
| 137 | // This is because the texels all around are different | 101 | // This is because the texels all around are different and total_offset reaches into clear areas. |
| 138 | // and total_offset reaches into clear areas. | ||
| 139 | // This works pretty well to keep the offset in bounds for these cases. | 102 | // This works pretty well to keep the offset in bounds for these cases. |
| 140 | float clamp_val = length(total_offset) / total_dist; | 103 | float16_t clamp_val = length(total_offset) / total_dist; |
| 141 | vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; | 104 | f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); |
| 142 | 105 | ||
| 143 | frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); | 106 | return texture(tex, tex_coord - final_offset); |
| 144 | } | 107 | } |
| 145 | } | 108 | } |
| 109 | |||
| 110 | void main() { | ||
| 111 | frag_color = Scaleforce(input_texture, tex_coord); | ||
| 112 | } \ No newline at end of file | ||