diff options
120 files changed, 9051 insertions, 646 deletions
diff --git a/externals/FidelityFX-FSR/ffx-fsr/ffx_a.h b/externals/FidelityFX-FSR/ffx-fsr/ffx_a.h new file mode 100644 index 000000000..d04bff55c --- /dev/null +++ b/externals/FidelityFX-FSR/ffx-fsr/ffx_a.h | |||
| @@ -0,0 +1,2656 @@ | |||
| 1 | //============================================================================================================================== | ||
| 2 | // | ||
| 3 | // [A] SHADER PORTABILITY 1.20210629 | ||
| 4 | // | ||
| 5 | //============================================================================================================================== | ||
| 6 | // FidelityFX Super Resolution Sample | ||
| 7 | // | ||
| 8 | // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. | ||
| 9 | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 10 | // of this software and associated documentation files(the "Software"), to deal | ||
| 11 | // in the Software without restriction, including without limitation the rights | ||
| 12 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell | ||
| 13 | // copies of the Software, and to permit persons to whom the Software is | ||
| 14 | // furnished to do so, subject to the following conditions : | ||
| 15 | // The above copyright notice and this permission notice shall be included in | ||
| 16 | // all copies or substantial portions of the Software. | ||
| 17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE | ||
| 20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 23 | // THE SOFTWARE. | ||
| 24 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 25 | // MIT LICENSE | ||
| 26 | // =========== | ||
| 27 | // Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS"). | ||
| 28 | // ----------- | ||
| 29 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation | ||
| 30 | // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, | ||
| 31 | // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the | ||
| 32 | // Software is furnished to do so, subject to the following conditions: | ||
| 33 | // ----------- | ||
| 34 | // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the | ||
| 35 | // Software. | ||
| 36 | // ----------- | ||
| 37 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | ||
| 38 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
| 39 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 40 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 41 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 42 | // ABOUT | ||
| 43 | // ===== | ||
| 44 | // Common central point for high-level shading language and C portability for various shader headers. | ||
| 45 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 46 | // DEFINES | ||
| 47 | // ======= | ||
| 48 | // A_CPU ..... Include the CPU related code. | ||
| 49 | // A_GPU ..... Include the GPU related code. | ||
| 50 | // A_GLSL .... Using GLSL. | ||
| 51 | // A_HLSL .... Using HLSL. | ||
| 52 | // A_HLSL_6_2 Using HLSL 6.2 with new 'uint16_t' and related types (requires '-enable-16bit-types'). | ||
| 53 | // A_NO_16_BIT_CAST Don't use instructions that are not availabe in SPIR-V (needed for running A_HLSL_6_2 on Vulkan) | ||
| 54 | // A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default). | ||
| 55 | // ======= | ||
| 56 | // A_BYTE .... Support 8-bit integer. | ||
| 57 | // A_HALF .... Support 16-bit integer and floating point. | ||
| 58 | // A_LONG .... Support 64-bit integer. | ||
| 59 | // A_DUBL .... Support 64-bit floating point. | ||
| 60 | // ======= | ||
| 61 | // A_WAVE .... Support wave-wide operations. | ||
| 62 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 63 | // To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'. | ||
| 64 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 65 | // SIMPLIFIED TYPE SYSTEM | ||
| 66 | // ====================== | ||
| 67 | // - All ints will be unsigned with exception of when signed is required. | ||
| 68 | // - Type naming simplified and shortened "A<type><#components>", | ||
| 69 | // - H = 16-bit float (half) | ||
| 70 | // - F = 32-bit float (float) | ||
| 71 | // - D = 64-bit float (double) | ||
| 72 | // - P = 1-bit integer (predicate, not using bool because 'B' is used for byte) | ||
| 73 | // - B = 8-bit integer (byte) | ||
| 74 | // - W = 16-bit integer (word) | ||
| 75 | // - U = 32-bit integer (unsigned) | ||
| 76 | // - L = 64-bit integer (long) | ||
| 77 | // - Using "AS<type><#components>" for signed when required. | ||
| 78 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 79 | // TODO | ||
| 80 | // ==== | ||
| 81 | // - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops). | ||
| 82 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 83 | // CHANGE LOG | ||
| 84 | // ========== | ||
| 85 | // 20200914 - Expanded wave ops and prx code. | ||
| 86 | // 20200713 - Added [ZOL] section, fixed serious bugs in sRGB and Rec.709 color conversion code, etc. | ||
| 87 | //============================================================================================================================== | ||
| 88 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 89 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 90 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 91 | //============================================================================================================================== | ||
| 92 | // COMMON | ||
| 93 | //============================================================================================================================== | ||
| 94 | #define A_2PI 6.28318530718 | ||
| 95 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 96 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 97 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 98 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 99 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 100 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 101 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 102 | //============================================================================================================================== | ||
| 103 | // | ||
| 104 | // | ||
| 105 | // CPU | ||
| 106 | // | ||
| 107 | // | ||
| 108 | //============================================================================================================================== | ||
| 109 | #ifdef A_CPU | ||
| 110 | // Supporting user defined overrides. | ||
| 111 | #ifndef A_RESTRICT | ||
| 112 | #define A_RESTRICT __restrict | ||
| 113 | #endif | ||
| 114 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 115 | #ifndef A_STATIC | ||
| 116 | #define A_STATIC static | ||
| 117 | #endif | ||
| 118 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 119 | // Same types across CPU and GPU. | ||
| 120 | // Predicate uses 32-bit integer (C friendly bool). | ||
| 121 | typedef uint32_t AP1; | ||
| 122 | typedef float AF1; | ||
| 123 | typedef double AD1; | ||
| 124 | typedef uint8_t AB1; | ||
| 125 | typedef uint16_t AW1; | ||
| 126 | typedef uint32_t AU1; | ||
| 127 | typedef uint64_t AL1; | ||
| 128 | typedef int8_t ASB1; | ||
| 129 | typedef int16_t ASW1; | ||
| 130 | typedef int32_t ASU1; | ||
| 131 | typedef int64_t ASL1; | ||
| 132 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 133 | #define AD1_(a) ((AD1)(a)) | ||
| 134 | #define AF1_(a) ((AF1)(a)) | ||
| 135 | #define AL1_(a) ((AL1)(a)) | ||
| 136 | #define AU1_(a) ((AU1)(a)) | ||
| 137 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 138 | #define ASL1_(a) ((ASL1)(a)) | ||
| 139 | #define ASU1_(a) ((ASU1)(a)) | ||
| 140 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 141 | A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;} | ||
| 142 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 143 | #define A_TRUE 1 | ||
| 144 | #define A_FALSE 0 | ||
| 145 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 146 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 147 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 148 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 149 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 150 | //============================================================================================================================== | ||
| 151 | // | ||
| 152 | // CPU/GPU PORTING | ||
| 153 | // | ||
| 154 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 155 | // Get CPU and GPU to share all setup code, without duplicate code paths. | ||
| 156 | // This uses a lower-case prefix for special vector constructs. | ||
| 157 | // - In C restrict pointers are used. | ||
| 158 | // - In the shading language, in/inout/out arguments are used. | ||
| 159 | // This depends on the ability to access a vector value in both languages via array syntax (aka color[2]). | ||
| 160 | //============================================================================================================================== | ||
| 161 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 162 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 163 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 164 | //============================================================================================================================== | ||
| 165 | // VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY | ||
| 166 | //============================================================================================================================== | ||
| 167 | #define retAD2 AD1 *A_RESTRICT | ||
| 168 | #define retAD3 AD1 *A_RESTRICT | ||
| 169 | #define retAD4 AD1 *A_RESTRICT | ||
| 170 | #define retAF2 AF1 *A_RESTRICT | ||
| 171 | #define retAF3 AF1 *A_RESTRICT | ||
| 172 | #define retAF4 AF1 *A_RESTRICT | ||
| 173 | #define retAL2 AL1 *A_RESTRICT | ||
| 174 | #define retAL3 AL1 *A_RESTRICT | ||
| 175 | #define retAL4 AL1 *A_RESTRICT | ||
| 176 | #define retAU2 AU1 *A_RESTRICT | ||
| 177 | #define retAU3 AU1 *A_RESTRICT | ||
| 178 | #define retAU4 AU1 *A_RESTRICT | ||
| 179 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 180 | #define inAD2 AD1 *A_RESTRICT | ||
| 181 | #define inAD3 AD1 *A_RESTRICT | ||
| 182 | #define inAD4 AD1 *A_RESTRICT | ||
| 183 | #define inAF2 AF1 *A_RESTRICT | ||
| 184 | #define inAF3 AF1 *A_RESTRICT | ||
| 185 | #define inAF4 AF1 *A_RESTRICT | ||
| 186 | #define inAL2 AL1 *A_RESTRICT | ||
| 187 | #define inAL3 AL1 *A_RESTRICT | ||
| 188 | #define inAL4 AL1 *A_RESTRICT | ||
| 189 | #define inAU2 AU1 *A_RESTRICT | ||
| 190 | #define inAU3 AU1 *A_RESTRICT | ||
| 191 | #define inAU4 AU1 *A_RESTRICT | ||
| 192 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 193 | #define inoutAD2 AD1 *A_RESTRICT | ||
| 194 | #define inoutAD3 AD1 *A_RESTRICT | ||
| 195 | #define inoutAD4 AD1 *A_RESTRICT | ||
| 196 | #define inoutAF2 AF1 *A_RESTRICT | ||
| 197 | #define inoutAF3 AF1 *A_RESTRICT | ||
| 198 | #define inoutAF4 AF1 *A_RESTRICT | ||
| 199 | #define inoutAL2 AL1 *A_RESTRICT | ||
| 200 | #define inoutAL3 AL1 *A_RESTRICT | ||
| 201 | #define inoutAL4 AL1 *A_RESTRICT | ||
| 202 | #define inoutAU2 AU1 *A_RESTRICT | ||
| 203 | #define inoutAU3 AU1 *A_RESTRICT | ||
| 204 | #define inoutAU4 AU1 *A_RESTRICT | ||
| 205 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 206 | #define outAD2 AD1 *A_RESTRICT | ||
| 207 | #define outAD3 AD1 *A_RESTRICT | ||
| 208 | #define outAD4 AD1 *A_RESTRICT | ||
| 209 | #define outAF2 AF1 *A_RESTRICT | ||
| 210 | #define outAF3 AF1 *A_RESTRICT | ||
| 211 | #define outAF4 AF1 *A_RESTRICT | ||
| 212 | #define outAL2 AL1 *A_RESTRICT | ||
| 213 | #define outAL3 AL1 *A_RESTRICT | ||
| 214 | #define outAL4 AL1 *A_RESTRICT | ||
| 215 | #define outAU2 AU1 *A_RESTRICT | ||
| 216 | #define outAU3 AU1 *A_RESTRICT | ||
| 217 | #define outAU4 AU1 *A_RESTRICT | ||
| 218 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 219 | #define varAD2(x) AD1 x[2] | ||
| 220 | #define varAD3(x) AD1 x[3] | ||
| 221 | #define varAD4(x) AD1 x[4] | ||
| 222 | #define varAF2(x) AF1 x[2] | ||
| 223 | #define varAF3(x) AF1 x[3] | ||
| 224 | #define varAF4(x) AF1 x[4] | ||
| 225 | #define varAL2(x) AL1 x[2] | ||
| 226 | #define varAL3(x) AL1 x[3] | ||
| 227 | #define varAL4(x) AL1 x[4] | ||
| 228 | #define varAU2(x) AU1 x[2] | ||
| 229 | #define varAU3(x) AU1 x[3] | ||
| 230 | #define varAU4(x) AU1 x[4] | ||
| 231 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 232 | #define initAD2(x,y) {x,y} | ||
| 233 | #define initAD3(x,y,z) {x,y,z} | ||
| 234 | #define initAD4(x,y,z,w) {x,y,z,w} | ||
| 235 | #define initAF2(x,y) {x,y} | ||
| 236 | #define initAF3(x,y,z) {x,y,z} | ||
| 237 | #define initAF4(x,y,z,w) {x,y,z,w} | ||
| 238 | #define initAL2(x,y) {x,y} | ||
| 239 | #define initAL3(x,y,z) {x,y,z} | ||
| 240 | #define initAL4(x,y,z,w) {x,y,z,w} | ||
| 241 | #define initAU2(x,y) {x,y} | ||
| 242 | #define initAU3(x,y,z) {x,y,z} | ||
| 243 | #define initAU4(x,y,z,w) {x,y,z,w} | ||
| 244 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 245 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 246 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 247 | //============================================================================================================================== | ||
| 248 | // SCALAR RETURN OPS | ||
| 249 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 250 | // TODO | ||
| 251 | // ==== | ||
| 252 | // - Replace transcendentals with manual versions. | ||
| 253 | //============================================================================================================================== | ||
| 254 | #ifdef A_GCC | ||
| 255 | A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);} | ||
| 256 | A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);} | ||
| 257 | A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));} | ||
| 258 | A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));} | ||
| 259 | #else | ||
| 260 | A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);} | ||
| 261 | A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);} | ||
| 262 | A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));} | ||
| 263 | A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));} | ||
| 264 | #endif | ||
| 265 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 266 | #ifdef A_GCC | ||
| 267 | A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);} | ||
| 268 | A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);} | ||
| 269 | #else | ||
| 270 | A_STATIC AD1 ACosD1(AD1 a){return cos(a);} | ||
| 271 | A_STATIC AF1 ACosF1(AF1 a){return cosf(a);} | ||
| 272 | #endif | ||
| 273 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 274 | A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];} | ||
| 275 | A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} | ||
| 276 | A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} | ||
| 277 | A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];} | ||
| 278 | A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} | ||
| 279 | A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} | ||
| 280 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 281 | #ifdef A_GCC | ||
| 282 | A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);} | ||
| 283 | A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);} | ||
| 284 | #else | ||
| 285 | A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);} | ||
| 286 | A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);} | ||
| 287 | #endif | ||
| 288 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 289 | #ifdef A_GCC | ||
| 290 | A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);} | ||
| 291 | A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);} | ||
| 292 | #else | ||
| 293 | A_STATIC AD1 AFloorD1(AD1 a){return floor(a);} | ||
| 294 | A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);} | ||
| 295 | #endif | ||
| 296 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 297 | A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);} | ||
| 298 | A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);} | ||
| 299 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 300 | #ifdef A_GCC | ||
| 301 | A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);} | ||
| 302 | A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);} | ||
| 303 | #else | ||
| 304 | A_STATIC AD1 ALog2D1(AD1 a){return log2(a);} | ||
| 305 | A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);} | ||
| 306 | #endif | ||
| 307 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 308 | A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;} | ||
| 309 | A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;} | ||
| 310 | A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;} | ||
| 311 | A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;} | ||
| 312 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 313 | // These follow the convention that A integer types don't have signage, until they are operated on. | ||
| 314 | A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;} | ||
| 315 | A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;} | ||
| 316 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 317 | A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a<b?a:b;} | ||
| 318 | A_STATIC AF1 AMinF1(AF1 a,AF1 b){return a<b?a:b;} | ||
| 319 | A_STATIC AL1 AMinL1(AL1 a,AL1 b){return a<b?a:b;} | ||
| 320 | A_STATIC AU1 AMinU1(AU1 a,AU1 b){return a<b?a:b;} | ||
| 321 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 322 | A_STATIC AL1 AMinSL1(AL1 a,AL1 b){return (ASL1_(a)<ASL1_(b))?a:b;} | ||
| 323 | A_STATIC AU1 AMinSU1(AU1 a,AU1 b){return (ASU1_(a)<ASU1_(b))?a:b;} | ||
| 324 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 325 | A_STATIC AD1 ARcpD1(AD1 a){return 1.0/a;} | ||
| 326 | A_STATIC AF1 ARcpF1(AF1 a){return 1.0f/a;} | ||
| 327 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 328 | A_STATIC AL1 AShrSL1(AL1 a,AL1 b){return AL1_(ASL1_(a)>>ASL1_(b));} | ||
| 329 | A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));} | ||
| 330 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 331 | #ifdef A_GCC | ||
| 332 | A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);} | ||
| 333 | A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);} | ||
| 334 | #else | ||
| 335 | A_STATIC AD1 ASinD1(AD1 a){return sin(a);} | ||
| 336 | A_STATIC AF1 ASinF1(AF1 a){return sinf(a);} | ||
| 337 | #endif | ||
| 338 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 339 | #ifdef A_GCC | ||
| 340 | A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);} | ||
| 341 | A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);} | ||
| 342 | #else | ||
| 343 | A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);} | ||
| 344 | A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);} | ||
| 345 | #endif | ||
| 346 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 347 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 348 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 349 | //============================================================================================================================== | ||
| 350 | // SCALAR RETURN OPS - DEPENDENT | ||
| 351 | //============================================================================================================================== | ||
| 352 | A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));} | ||
| 353 | A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));} | ||
| 354 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 355 | A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);} | ||
| 356 | A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);} | ||
| 357 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 358 | A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));} | ||
| 359 | A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));} | ||
| 360 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 361 | A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));} | ||
| 362 | A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));} | ||
| 363 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 364 | A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));} | ||
| 365 | A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));} | ||
| 366 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 367 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 368 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 369 | //============================================================================================================================== | ||
| 370 | // VECTOR OPS | ||
| 371 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 372 | // These are added as needed for production or prototyping, so not necessarily a complete set. | ||
| 373 | // They follow a convention of taking in a destination and also returning the destination value to increase utility. | ||
| 374 | //============================================================================================================================== | ||
| 375 | A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;} | ||
| 376 | A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;} | ||
| 377 | A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;} | ||
| 378 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 379 | A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;} | ||
| 380 | A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;} | ||
| 381 | A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;} | ||
| 382 | //============================================================================================================================== | ||
| 383 | A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} | ||
| 384 | A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} | ||
| 385 | A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} | ||
| 386 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 387 | A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} | ||
| 388 | A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} | ||
| 389 | A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} | ||
| 390 | //============================================================================================================================== | ||
| 391 | A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} | ||
| 392 | A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} | ||
| 393 | A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} | ||
| 394 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 395 | A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} | ||
| 396 | A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} | ||
| 397 | A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} | ||
| 398 | //============================================================================================================================== | ||
| 399 | A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;} | ||
| 400 | A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} | ||
| 401 | A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} | ||
| 402 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 403 | A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;} | ||
| 404 | A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} | ||
| 405 | A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} | ||
| 406 | //============================================================================================================================== | ||
| 407 | A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;} | ||
| 408 | A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;} | ||
| 409 | A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;} | ||
| 410 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 411 | A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;} | ||
| 412 | A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;} | ||
| 413 | A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;} | ||
| 414 | //============================================================================================================================== | ||
| 415 | A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;} | ||
| 416 | A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;} | ||
| 417 | A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;} | ||
| 418 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 419 | A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;} | ||
| 420 | A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;} | ||
| 421 | A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;} | ||
| 422 | //============================================================================================================================== | ||
| 423 | A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;} | ||
| 424 | A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;} | ||
| 425 | A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;} | ||
| 426 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 427 | A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;} | ||
| 428 | A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;} | ||
| 429 | A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;} | ||
| 430 | //============================================================================================================================== | ||
| 431 | A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;} | ||
| 432 | A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;} | ||
| 433 | A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;} | ||
| 434 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 435 | A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;} | ||
| 436 | A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;} | ||
| 437 | A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;} | ||
| 438 | //============================================================================================================================== | ||
| 439 | A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} | ||
| 440 | A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} | ||
| 441 | A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} | ||
| 442 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 443 | A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} | ||
| 444 | A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} | ||
| 445 | A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} | ||
| 446 | //============================================================================================================================== | ||
| 447 | A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} | ||
| 448 | A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} | ||
| 449 | A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} | ||
| 450 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 451 | A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} | ||
| 452 | A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} | ||
| 453 | A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} | ||
| 454 | //============================================================================================================================== | ||
| 455 | A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;} | ||
| 456 | A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} | ||
| 457 | A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} | ||
| 458 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 459 | A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;} | ||
| 460 | A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} | ||
| 461 | A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} | ||
| 462 | //============================================================================================================================== | ||
| 463 | A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;} | ||
| 464 | A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;} | ||
| 465 | A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;} | ||
| 466 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 467 | A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;} | ||
| 468 | A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;} | ||
| 469 | A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;} | ||
| 470 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 471 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 472 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 473 | //============================================================================================================================== | ||
| 474 | // HALF FLOAT PACKING | ||
| 475 | //============================================================================================================================== | ||
| 476 | // Convert float to half (in lower 16-bits of output). | ||
| 477 | // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf | ||
| 478 | // Supports denormals. | ||
| 479 | // Conversion rules are to make computations possibly "safer" on the GPU, | ||
| 480 | // -INF & -NaN -> -65504 | ||
| 481 | // +INF & +NaN -> +65504 | ||
| 482 | A_STATIC AU1 AU1_AH1_AF1(AF1 f){ | ||
| 483 | static AW1 base[512]={ | ||
| 484 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, | ||
| 485 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, | ||
| 486 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, | ||
| 487 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, | ||
| 488 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, | ||
| 489 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, | ||
| 490 | 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100, | ||
| 491 | 0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00, | ||
| 492 | 0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff, | ||
| 493 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 494 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 495 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 496 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 497 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 498 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 499 | 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, | ||
| 500 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, | ||
| 501 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, | ||
| 502 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, | ||
| 503 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, | ||
| 504 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, | ||
| 505 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, | ||
| 506 | 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100, | ||
| 507 | 0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00, | ||
| 508 | 0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff, | ||
| 509 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, | ||
| 510 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, | ||
| 511 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, | ||
| 512 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, | ||
| 513 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, | ||
| 514 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, | ||
| 515 | 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff}; | ||
| 516 | static AB1 shift[512]={ | ||
| 517 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 518 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 519 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 520 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 521 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 522 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 523 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, | ||
| 524 | 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, | ||
| 525 | 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, | ||
| 526 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 527 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 528 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 529 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 530 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 531 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 532 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 533 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 534 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 535 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 536 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 537 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 538 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 539 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, | ||
| 540 | 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, | ||
| 541 | 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, | ||
| 542 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 543 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 544 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 545 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 546 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 547 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, | ||
| 548 | 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18}; | ||
| 549 | union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);} | ||
| 550 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 551 | // Used to output packed constant. | ||
| 552 | A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);} | ||
| 553 | #endif | ||
| 554 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 555 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 556 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 557 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 558 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 559 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 560 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 561 | //============================================================================================================================== | ||
| 562 | // | ||
| 563 | // | ||
| 564 | // GLSL | ||
| 565 | // | ||
| 566 | // | ||
| 567 | //============================================================================================================================== | ||
| 568 | #if defined(A_GLSL) && defined(A_GPU) | ||
| 569 | #ifndef A_SKIP_EXT | ||
| 570 | #ifdef A_HALF | ||
| 571 | #extension GL_EXT_shader_16bit_storage:require | ||
| 572 | #extension GL_EXT_shader_explicit_arithmetic_types:require | ||
| 573 | #endif | ||
| 574 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 575 | #ifdef A_LONG | ||
| 576 | #extension GL_ARB_gpu_shader_int64:require | ||
| 577 | #extension GL_NV_shader_atomic_int64:require | ||
| 578 | #endif | ||
| 579 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 580 | #ifdef A_WAVE | ||
| 581 | #extension GL_KHR_shader_subgroup_arithmetic:require | ||
| 582 | #extension GL_KHR_shader_subgroup_ballot:require | ||
| 583 | #extension GL_KHR_shader_subgroup_quad:require | ||
| 584 | #extension GL_KHR_shader_subgroup_shuffle:require | ||
| 585 | #endif | ||
| 586 | #endif | ||
| 587 | //============================================================================================================================== | ||
| 588 | #define AP1 bool | ||
| 589 | #define AP2 bvec2 | ||
| 590 | #define AP3 bvec3 | ||
| 591 | #define AP4 bvec4 | ||
| 592 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 593 | #define AF1 float | ||
| 594 | #define AF2 vec2 | ||
| 595 | #define AF3 vec3 | ||
| 596 | #define AF4 vec4 | ||
| 597 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 598 | #define AU1 uint | ||
| 599 | #define AU2 uvec2 | ||
| 600 | #define AU3 uvec3 | ||
| 601 | #define AU4 uvec4 | ||
| 602 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 603 | #define ASU1 int | ||
| 604 | #define ASU2 ivec2 | ||
| 605 | #define ASU3 ivec3 | ||
| 606 | #define ASU4 ivec4 | ||
| 607 | //============================================================================================================================== | ||
| 608 | #define AF1_AU1(x) uintBitsToFloat(AU1(x)) | ||
| 609 | #define AF2_AU2(x) uintBitsToFloat(AU2(x)) | ||
| 610 | #define AF3_AU3(x) uintBitsToFloat(AU3(x)) | ||
| 611 | #define AF4_AU4(x) uintBitsToFloat(AU4(x)) | ||
| 612 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 613 | #define AU1_AF1(x) floatBitsToUint(AF1(x)) | ||
| 614 | #define AU2_AF2(x) floatBitsToUint(AF2(x)) | ||
| 615 | #define AU3_AF3(x) floatBitsToUint(AF3(x)) | ||
| 616 | #define AU4_AF4(x) floatBitsToUint(AF4(x)) | ||
| 617 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 618 | AU1 AU1_AH1_AF1_x(AF1 a){return packHalf2x16(AF2(a,0.0));} | ||
| 619 | #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) | ||
| 620 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 621 | #define AU1_AH2_AF2 packHalf2x16 | ||
| 622 | #define AU1_AW2Unorm_AF2 packUnorm2x16 | ||
| 623 | #define AU1_AB4Unorm_AF4 packUnorm4x8 | ||
| 624 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 625 | #define AF2_AH2_AU1 unpackHalf2x16 | ||
| 626 | #define AF2_AW2Unorm_AU1 unpackUnorm2x16 | ||
| 627 | #define AF4_AB4Unorm_AU1 unpackUnorm4x8 | ||
| 628 | //============================================================================================================================== | ||
| 629 | AF1 AF1_x(AF1 a){return AF1(a);} | ||
| 630 | AF2 AF2_x(AF1 a){return AF2(a,a);} | ||
| 631 | AF3 AF3_x(AF1 a){return AF3(a,a,a);} | ||
| 632 | AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} | ||
| 633 | #define AF1_(a) AF1_x(AF1(a)) | ||
| 634 | #define AF2_(a) AF2_x(AF1(a)) | ||
| 635 | #define AF3_(a) AF3_x(AF1(a)) | ||
| 636 | #define AF4_(a) AF4_x(AF1(a)) | ||
| 637 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 638 | AU1 AU1_x(AU1 a){return AU1(a);} | ||
| 639 | AU2 AU2_x(AU1 a){return AU2(a,a);} | ||
| 640 | AU3 AU3_x(AU1 a){return AU3(a,a,a);} | ||
| 641 | AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} | ||
| 642 | #define AU1_(a) AU1_x(AU1(a)) | ||
| 643 | #define AU2_(a) AU2_x(AU1(a)) | ||
| 644 | #define AU3_(a) AU3_x(AU1(a)) | ||
| 645 | #define AU4_(a) AU4_x(AU1(a)) | ||
| 646 | //============================================================================================================================== | ||
| 647 | AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} | ||
| 648 | AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} | ||
| 649 | AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} | ||
| 650 | AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} | ||
| 651 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 652 | AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));} | ||
| 653 | AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} | ||
| 654 | // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate. | ||
| 655 | AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){return bitfieldInsert(src,ins,0,ASU1(bits));} | ||
| 656 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 657 | // V_MED3_F32. | ||
| 658 | AF1 AClampF1(AF1 x,AF1 n,AF1 m){return clamp(x,n,m);} | ||
| 659 | AF2 AClampF2(AF2 x,AF2 n,AF2 m){return clamp(x,n,m);} | ||
| 660 | AF3 AClampF3(AF3 x,AF3 n,AF3 m){return clamp(x,n,m);} | ||
| 661 | AF4 AClampF4(AF4 x,AF4 n,AF4 m){return clamp(x,n,m);} | ||
| 662 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 663 | // V_FRACT_F32 (note DX frac() is different). | ||
| 664 | AF1 AFractF1(AF1 x){return fract(x);} | ||
| 665 | AF2 AFractF2(AF2 x){return fract(x);} | ||
| 666 | AF3 AFractF3(AF3 x){return fract(x);} | ||
| 667 | AF4 AFractF4(AF4 x){return fract(x);} | ||
| 668 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 669 | AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return mix(x,y,a);} | ||
| 670 | AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return mix(x,y,a);} | ||
| 671 | AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return mix(x,y,a);} | ||
| 672 | AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return mix(x,y,a);} | ||
| 673 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 674 | // V_MAX3_F32. | ||
| 675 | AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));} | ||
| 676 | AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));} | ||
| 677 | AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));} | ||
| 678 | AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));} | ||
| 679 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 680 | AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));} | ||
| 681 | AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));} | ||
| 682 | AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));} | ||
| 683 | AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));} | ||
| 684 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 685 | AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));} | ||
| 686 | AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));} | ||
| 687 | AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));} | ||
| 688 | AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));} | ||
| 689 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 690 | AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));} | ||
| 691 | AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));} | ||
| 692 | AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));} | ||
| 693 | AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));} | ||
| 694 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 695 | // Clamp has an easier pattern match for med3 when some ordering is known. | ||
| 696 | // V_MED3_F32. | ||
| 697 | AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));} | ||
| 698 | AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));} | ||
| 699 | AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));} | ||
| 700 | AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));} | ||
| 701 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 702 | // V_MIN3_F32. | ||
| 703 | AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));} | ||
| 704 | AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));} | ||
| 705 | AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));} | ||
| 706 | AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));} | ||
| 707 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 708 | AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));} | ||
| 709 | AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));} | ||
| 710 | AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));} | ||
| 711 | AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));} | ||
| 712 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 713 | AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));} | ||
| 714 | AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));} | ||
| 715 | AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));} | ||
| 716 | AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));} | ||
| 717 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 718 | AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));} | ||
| 719 | AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));} | ||
| 720 | AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));} | ||
| 721 | AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));} | ||
| 722 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 723 | // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently. | ||
| 724 | // V_COS_F32. | ||
| 725 | AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));} | ||
| 726 | AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));} | ||
| 727 | AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));} | ||
| 728 | AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));} | ||
| 729 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 730 | // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently. | ||
| 731 | // V_SIN_F32. | ||
| 732 | AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));} | ||
| 733 | AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));} | ||
| 734 | AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));} | ||
| 735 | AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));} | ||
| 736 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 737 | AF1 ARcpF1(AF1 x){return AF1_(1.0)/x;} | ||
| 738 | AF2 ARcpF2(AF2 x){return AF2_(1.0)/x;} | ||
| 739 | AF3 ARcpF3(AF3 x){return AF3_(1.0)/x;} | ||
| 740 | AF4 ARcpF4(AF4 x){return AF4_(1.0)/x;} | ||
| 741 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 742 | AF1 ARsqF1(AF1 x){return AF1_(1.0)/sqrt(x);} | ||
| 743 | AF2 ARsqF2(AF2 x){return AF2_(1.0)/sqrt(x);} | ||
| 744 | AF3 ARsqF3(AF3 x){return AF3_(1.0)/sqrt(x);} | ||
| 745 | AF4 ARsqF4(AF4 x){return AF4_(1.0)/sqrt(x);} | ||
| 746 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 747 | AF1 ASatF1(AF1 x){return clamp(x,AF1_(0.0),AF1_(1.0));} | ||
| 748 | AF2 ASatF2(AF2 x){return clamp(x,AF2_(0.0),AF2_(1.0));} | ||
| 749 | AF3 ASatF3(AF3 x){return clamp(x,AF3_(0.0),AF3_(1.0));} | ||
| 750 | AF4 ASatF4(AF4 x){return clamp(x,AF4_(0.0),AF4_(1.0));} | ||
| 751 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 752 | AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));} | ||
| 753 | AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} | ||
| 754 | AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} | ||
| 755 | AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} | ||
| 756 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 757 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 758 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 759 | //============================================================================================================================== | ||
| 760 | // GLSL BYTE | ||
| 761 | //============================================================================================================================== | ||
| 762 | #ifdef A_BYTE | ||
| 763 | #define AB1 uint8_t | ||
| 764 | #define AB2 u8vec2 | ||
| 765 | #define AB3 u8vec3 | ||
| 766 | #define AB4 u8vec4 | ||
| 767 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 768 | #define ASB1 int8_t | ||
| 769 | #define ASB2 i8vec2 | ||
| 770 | #define ASB3 i8vec3 | ||
| 771 | #define ASB4 i8vec4 | ||
| 772 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 773 | AB1 AB1_x(AB1 a){return AB1(a);} | ||
| 774 | AB2 AB2_x(AB1 a){return AB2(a,a);} | ||
| 775 | AB3 AB3_x(AB1 a){return AB3(a,a,a);} | ||
| 776 | AB4 AB4_x(AB1 a){return AB4(a,a,a,a);} | ||
| 777 | #define AB1_(a) AB1_x(AB1(a)) | ||
| 778 | #define AB2_(a) AB2_x(AB1(a)) | ||
| 779 | #define AB3_(a) AB3_x(AB1(a)) | ||
| 780 | #define AB4_(a) AB4_x(AB1(a)) | ||
| 781 | #endif | ||
| 782 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 783 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 784 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 785 | //============================================================================================================================== | ||
| 786 | // GLSL HALF | ||
| 787 | //============================================================================================================================== | ||
| 788 | #ifdef A_HALF | ||
| 789 | #define AH1 float16_t | ||
| 790 | #define AH2 f16vec2 | ||
| 791 | #define AH3 f16vec3 | ||
| 792 | #define AH4 f16vec4 | ||
| 793 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 794 | #define AW1 uint16_t | ||
| 795 | #define AW2 u16vec2 | ||
| 796 | #define AW3 u16vec3 | ||
| 797 | #define AW4 u16vec4 | ||
| 798 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 799 | #define ASW1 int16_t | ||
| 800 | #define ASW2 i16vec2 | ||
| 801 | #define ASW3 i16vec3 | ||
| 802 | #define ASW4 i16vec4 | ||
| 803 | //============================================================================================================================== | ||
| 804 | #define AH2_AU1(x) unpackFloat2x16(AU1(x)) | ||
| 805 | AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));} | ||
| 806 | #define AH4_AU2(x) AH4_AU2_x(AU2(x)) | ||
| 807 | #define AW2_AU1(x) unpackUint2x16(AU1(x)) | ||
| 808 | #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x))) | ||
| 809 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 810 | #define AU1_AH2(x) packFloat2x16(AH2(x)) | ||
| 811 | AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));} | ||
| 812 | #define AU2_AH4(x) AU2_AH4_x(AH4(x)) | ||
| 813 | #define AU1_AW2(x) packUint2x16(AW2(x)) | ||
| 814 | #define AU2_AW4(x) unpack32(packUint4x16(AW4(x))) | ||
| 815 | //============================================================================================================================== | ||
| 816 | #define AW1_AH1(x) halfBitsToUint16(AH1(x)) | ||
| 817 | #define AW2_AH2(x) halfBitsToUint16(AH2(x)) | ||
| 818 | #define AW3_AH3(x) halfBitsToUint16(AH3(x)) | ||
| 819 | #define AW4_AH4(x) halfBitsToUint16(AH4(x)) | ||
| 820 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 821 | #define AH1_AW1(x) uint16BitsToHalf(AW1(x)) | ||
| 822 | #define AH2_AW2(x) uint16BitsToHalf(AW2(x)) | ||
| 823 | #define AH3_AW3(x) uint16BitsToHalf(AW3(x)) | ||
| 824 | #define AH4_AW4(x) uint16BitsToHalf(AW4(x)) | ||
| 825 | //============================================================================================================================== | ||
| 826 | AH1 AH1_x(AH1 a){return AH1(a);} | ||
| 827 | AH2 AH2_x(AH1 a){return AH2(a,a);} | ||
| 828 | AH3 AH3_x(AH1 a){return AH3(a,a,a);} | ||
| 829 | AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} | ||
| 830 | #define AH1_(a) AH1_x(AH1(a)) | ||
| 831 | #define AH2_(a) AH2_x(AH1(a)) | ||
| 832 | #define AH3_(a) AH3_x(AH1(a)) | ||
| 833 | #define AH4_(a) AH4_x(AH1(a)) | ||
| 834 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 835 | AW1 AW1_x(AW1 a){return AW1(a);} | ||
| 836 | AW2 AW2_x(AW1 a){return AW2(a,a);} | ||
| 837 | AW3 AW3_x(AW1 a){return AW3(a,a,a);} | ||
| 838 | AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} | ||
| 839 | #define AW1_(a) AW1_x(AW1(a)) | ||
| 840 | #define AW2_(a) AW2_x(AW1(a)) | ||
| 841 | #define AW3_(a) AW3_x(AW1(a)) | ||
| 842 | #define AW4_(a) AW4_x(AW1(a)) | ||
| 843 | //============================================================================================================================== | ||
| 844 | AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} | ||
| 845 | AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} | ||
| 846 | AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} | ||
| 847 | AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} | ||
| 848 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 849 | AH1 AClampH1(AH1 x,AH1 n,AH1 m){return clamp(x,n,m);} | ||
| 850 | AH2 AClampH2(AH2 x,AH2 n,AH2 m){return clamp(x,n,m);} | ||
| 851 | AH3 AClampH3(AH3 x,AH3 n,AH3 m){return clamp(x,n,m);} | ||
| 852 | AH4 AClampH4(AH4 x,AH4 n,AH4 m){return clamp(x,n,m);} | ||
| 853 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 854 | AH1 AFractH1(AH1 x){return fract(x);} | ||
| 855 | AH2 AFractH2(AH2 x){return fract(x);} | ||
| 856 | AH3 AFractH3(AH3 x){return fract(x);} | ||
| 857 | AH4 AFractH4(AH4 x){return fract(x);} | ||
| 858 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 859 | AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);} | ||
| 860 | AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);} | ||
| 861 | AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);} | ||
| 862 | AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);} | ||
| 863 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 864 | // No packed version of max3. | ||
| 865 | AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} | ||
| 866 | AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} | ||
| 867 | AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} | ||
| 868 | AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} | ||
| 869 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 870 | AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} | ||
| 871 | AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} | ||
| 872 | AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} | ||
| 873 | AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} | ||
| 874 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 875 | // No packed version of min3. | ||
| 876 | AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} | ||
| 877 | AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} | ||
| 878 | AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} | ||
| 879 | AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} | ||
| 880 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 881 | AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} | ||
| 882 | AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} | ||
| 883 | AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} | ||
| 884 | AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} | ||
| 885 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 886 | AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;} | ||
| 887 | AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;} | ||
| 888 | AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;} | ||
| 889 | AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;} | ||
| 890 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 891 | AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);} | ||
| 892 | AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);} | ||
| 893 | AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);} | ||
| 894 | AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);} | ||
| 895 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 896 | AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));} | ||
| 897 | AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));} | ||
| 898 | AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));} | ||
| 899 | AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));} | ||
| 900 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 901 | AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} | ||
| 902 | AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} | ||
| 903 | AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} | ||
| 904 | AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} | ||
| 905 | #endif | ||
| 906 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 907 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 908 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 909 | //============================================================================================================================== | ||
| 910 | // GLSL DOUBLE | ||
| 911 | //============================================================================================================================== | ||
| 912 | #ifdef A_DUBL | ||
| 913 | #define AD1 double | ||
| 914 | #define AD2 dvec2 | ||
| 915 | #define AD3 dvec3 | ||
| 916 | #define AD4 dvec4 | ||
| 917 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 918 | AD1 AD1_x(AD1 a){return AD1(a);} | ||
| 919 | AD2 AD2_x(AD1 a){return AD2(a,a);} | ||
| 920 | AD3 AD3_x(AD1 a){return AD3(a,a,a);} | ||
| 921 | AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} | ||
| 922 | #define AD1_(a) AD1_x(AD1(a)) | ||
| 923 | #define AD2_(a) AD2_x(AD1(a)) | ||
| 924 | #define AD3_(a) AD3_x(AD1(a)) | ||
| 925 | #define AD4_(a) AD4_x(AD1(a)) | ||
| 926 | //============================================================================================================================== | ||
| 927 | AD1 AFractD1(AD1 x){return fract(x);} | ||
| 928 | AD2 AFractD2(AD2 x){return fract(x);} | ||
| 929 | AD3 AFractD3(AD3 x){return fract(x);} | ||
| 930 | AD4 AFractD4(AD4 x){return fract(x);} | ||
| 931 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 932 | AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);} | ||
| 933 | AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);} | ||
| 934 | AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);} | ||
| 935 | AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);} | ||
| 936 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 937 | AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;} | ||
| 938 | AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;} | ||
| 939 | AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;} | ||
| 940 | AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;} | ||
| 941 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 942 | AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);} | ||
| 943 | AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);} | ||
| 944 | AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);} | ||
| 945 | AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);} | ||
| 946 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 947 | AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));} | ||
| 948 | AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));} | ||
| 949 | AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));} | ||
| 950 | AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));} | ||
| 951 | #endif | ||
| 952 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 953 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 954 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 955 | //============================================================================================================================== | ||
| 956 | // GLSL LONG | ||
| 957 | //============================================================================================================================== | ||
| 958 | #ifdef A_LONG | ||
| 959 | #define AL1 uint64_t | ||
| 960 | #define AL2 u64vec2 | ||
| 961 | #define AL3 u64vec3 | ||
| 962 | #define AL4 u64vec4 | ||
| 963 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 964 | #define ASL1 int64_t | ||
| 965 | #define ASL2 i64vec2 | ||
| 966 | #define ASL3 i64vec3 | ||
| 967 | #define ASL4 i64vec4 | ||
| 968 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 969 | #define AL1_AU2(x) packUint2x32(AU2(x)) | ||
| 970 | #define AU2_AL1(x) unpackUint2x32(AL1(x)) | ||
| 971 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 972 | AL1 AL1_x(AL1 a){return AL1(a);} | ||
| 973 | AL2 AL2_x(AL1 a){return AL2(a,a);} | ||
| 974 | AL3 AL3_x(AL1 a){return AL3(a,a,a);} | ||
| 975 | AL4 AL4_x(AL1 a){return AL4(a,a,a,a);} | ||
| 976 | #define AL1_(a) AL1_x(AL1(a)) | ||
| 977 | #define AL2_(a) AL2_x(AL1(a)) | ||
| 978 | #define AL3_(a) AL3_x(AL1(a)) | ||
| 979 | #define AL4_(a) AL4_x(AL1(a)) | ||
| 980 | //============================================================================================================================== | ||
| 981 | AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));} | ||
| 982 | AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));} | ||
| 983 | AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));} | ||
| 984 | AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));} | ||
| 985 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 986 | AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));} | ||
| 987 | AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));} | ||
| 988 | AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));} | ||
| 989 | AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));} | ||
| 990 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 991 | AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));} | ||
| 992 | AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));} | ||
| 993 | AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));} | ||
| 994 | AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));} | ||
| 995 | #endif | ||
| 996 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 997 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 998 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 999 | //============================================================================================================================== | ||
| 1000 | // WAVE OPERATIONS | ||
| 1001 | //============================================================================================================================== | ||
| 1002 | #ifdef A_WAVE | ||
| 1003 | // Where 'x' must be a compile time literal. | ||
| 1004 | AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1005 | AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1006 | AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1007 | AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1008 | AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1009 | AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1010 | AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1011 | AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);} | ||
| 1012 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1013 | #ifdef A_HALF | ||
| 1014 | AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(subgroupShuffleXor(AU1_AH2(v),x));} | ||
| 1015 | AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(subgroupShuffleXor(AU2_AH4(v),x));} | ||
| 1016 | AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(subgroupShuffleXor(AU1_AW2(v),x));} | ||
| 1017 | AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU2(subgroupShuffleXor(AU2_AW4(v),x));} | ||
| 1018 | #endif | ||
| 1019 | #endif | ||
| 1020 | //============================================================================================================================== | ||
| 1021 | #endif | ||
| 1022 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1023 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1024 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1025 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1026 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1027 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1028 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1029 | //============================================================================================================================== | ||
| 1030 | // | ||
| 1031 | // | ||
| 1032 | // HLSL | ||
| 1033 | // | ||
| 1034 | // | ||
| 1035 | //============================================================================================================================== | ||
| 1036 | #if defined(A_HLSL) && defined(A_GPU) | ||
| 1037 | #ifdef A_HLSL_6_2 | ||
| 1038 | #define AP1 bool | ||
| 1039 | #define AP2 bool2 | ||
| 1040 | #define AP3 bool3 | ||
| 1041 | #define AP4 bool4 | ||
| 1042 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1043 | #define AF1 float32_t | ||
| 1044 | #define AF2 float32_t2 | ||
| 1045 | #define AF3 float32_t3 | ||
| 1046 | #define AF4 float32_t4 | ||
| 1047 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1048 | #define AU1 uint32_t | ||
| 1049 | #define AU2 uint32_t2 | ||
| 1050 | #define AU3 uint32_t3 | ||
| 1051 | #define AU4 uint32_t4 | ||
| 1052 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1053 | #define ASU1 int32_t | ||
| 1054 | #define ASU2 int32_t2 | ||
| 1055 | #define ASU3 int32_t3 | ||
| 1056 | #define ASU4 int32_t4 | ||
| 1057 | #else | ||
| 1058 | #define AP1 bool | ||
| 1059 | #define AP2 bool2 | ||
| 1060 | #define AP3 bool3 | ||
| 1061 | #define AP4 bool4 | ||
| 1062 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1063 | #define AF1 float | ||
| 1064 | #define AF2 float2 | ||
| 1065 | #define AF3 float3 | ||
| 1066 | #define AF4 float4 | ||
| 1067 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1068 | #define AU1 uint | ||
| 1069 | #define AU2 uint2 | ||
| 1070 | #define AU3 uint3 | ||
| 1071 | #define AU4 uint4 | ||
| 1072 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1073 | #define ASU1 int | ||
| 1074 | #define ASU2 int2 | ||
| 1075 | #define ASU3 int3 | ||
| 1076 | #define ASU4 int4 | ||
| 1077 | #endif | ||
| 1078 | //============================================================================================================================== | ||
| 1079 | #define AF1_AU1(x) asfloat(AU1(x)) | ||
| 1080 | #define AF2_AU2(x) asfloat(AU2(x)) | ||
| 1081 | #define AF3_AU3(x) asfloat(AU3(x)) | ||
| 1082 | #define AF4_AU4(x) asfloat(AU4(x)) | ||
| 1083 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1084 | #define AU1_AF1(x) asuint(AF1(x)) | ||
| 1085 | #define AU2_AF2(x) asuint(AF2(x)) | ||
| 1086 | #define AU3_AF3(x) asuint(AF3(x)) | ||
| 1087 | #define AU4_AF4(x) asuint(AF4(x)) | ||
| 1088 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1089 | AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);} | ||
| 1090 | #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) | ||
| 1091 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1092 | AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);} | ||
| 1093 | #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a)) | ||
| 1094 | #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x)) | ||
| 1095 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1096 | AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));} | ||
| 1097 | #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x)) | ||
| 1098 | //============================================================================================================================== | ||
| 1099 | AF1 AF1_x(AF1 a){return AF1(a);} | ||
| 1100 | AF2 AF2_x(AF1 a){return AF2(a,a);} | ||
| 1101 | AF3 AF3_x(AF1 a){return AF3(a,a,a);} | ||
| 1102 | AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} | ||
| 1103 | #define AF1_(a) AF1_x(AF1(a)) | ||
| 1104 | #define AF2_(a) AF2_x(AF1(a)) | ||
| 1105 | #define AF3_(a) AF3_x(AF1(a)) | ||
| 1106 | #define AF4_(a) AF4_x(AF1(a)) | ||
| 1107 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1108 | AU1 AU1_x(AU1 a){return AU1(a);} | ||
| 1109 | AU2 AU2_x(AU1 a){return AU2(a,a);} | ||
| 1110 | AU3 AU3_x(AU1 a){return AU3(a,a,a);} | ||
| 1111 | AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} | ||
| 1112 | #define AU1_(a) AU1_x(AU1(a)) | ||
| 1113 | #define AU2_(a) AU2_x(AU1(a)) | ||
| 1114 | #define AU3_(a) AU3_x(AU1(a)) | ||
| 1115 | #define AU4_(a) AU4_x(AU1(a)) | ||
| 1116 | //============================================================================================================================== | ||
| 1117 | AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} | ||
| 1118 | AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} | ||
| 1119 | AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} | ||
| 1120 | AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} | ||
| 1121 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1122 | AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<<bits)-1;return (src>>off)&mask;} | ||
| 1123 | AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} | ||
| 1124 | AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<<bits)-1;return (ins&mask)|(src&(~mask));} | ||
| 1125 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1126 | AF1 AClampF1(AF1 x,AF1 n,AF1 m){return max(n,min(x,m));} | ||
| 1127 | AF2 AClampF2(AF2 x,AF2 n,AF2 m){return max(n,min(x,m));} | ||
| 1128 | AF3 AClampF3(AF3 x,AF3 n,AF3 m){return max(n,min(x,m));} | ||
| 1129 | AF4 AClampF4(AF4 x,AF4 n,AF4 m){return max(n,min(x,m));} | ||
| 1130 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1131 | AF1 AFractF1(AF1 x){return x-floor(x);} | ||
| 1132 | AF2 AFractF2(AF2 x){return x-floor(x);} | ||
| 1133 | AF3 AFractF3(AF3 x){return x-floor(x);} | ||
| 1134 | AF4 AFractF4(AF4 x){return x-floor(x);} | ||
| 1135 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1136 | AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return lerp(x,y,a);} | ||
| 1137 | AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return lerp(x,y,a);} | ||
| 1138 | AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return lerp(x,y,a);} | ||
| 1139 | AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return lerp(x,y,a);} | ||
| 1140 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1141 | AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));} | ||
| 1142 | AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));} | ||
| 1143 | AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));} | ||
| 1144 | AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));} | ||
| 1145 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1146 | AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));} | ||
| 1147 | AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));} | ||
| 1148 | AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));} | ||
| 1149 | AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));} | ||
| 1150 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1151 | AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));} | ||
| 1152 | AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));} | ||
| 1153 | AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));} | ||
| 1154 | AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));} | ||
| 1155 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1156 | AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));} | ||
| 1157 | AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));} | ||
| 1158 | AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));} | ||
| 1159 | AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));} | ||
| 1160 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1161 | AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));} | ||
| 1162 | AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));} | ||
| 1163 | AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));} | ||
| 1164 | AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));} | ||
| 1165 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1166 | AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));} | ||
| 1167 | AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));} | ||
| 1168 | AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));} | ||
| 1169 | AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));} | ||
| 1170 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1171 | AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));} | ||
| 1172 | AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));} | ||
| 1173 | AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));} | ||
| 1174 | AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));} | ||
| 1175 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1176 | AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));} | ||
| 1177 | AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));} | ||
| 1178 | AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));} | ||
| 1179 | AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));} | ||
| 1180 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1181 | AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));} | ||
| 1182 | AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));} | ||
| 1183 | AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));} | ||
| 1184 | AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));} | ||
| 1185 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1186 | AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));} | ||
| 1187 | AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));} | ||
| 1188 | AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));} | ||
| 1189 | AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));} | ||
| 1190 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1191 | AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));} | ||
| 1192 | AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));} | ||
| 1193 | AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));} | ||
| 1194 | AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));} | ||
| 1195 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1196 | AF1 ARcpF1(AF1 x){return rcp(x);} | ||
| 1197 | AF2 ARcpF2(AF2 x){return rcp(x);} | ||
| 1198 | AF3 ARcpF3(AF3 x){return rcp(x);} | ||
| 1199 | AF4 ARcpF4(AF4 x){return rcp(x);} | ||
| 1200 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1201 | AF1 ARsqF1(AF1 x){return rsqrt(x);} | ||
| 1202 | AF2 ARsqF2(AF2 x){return rsqrt(x);} | ||
| 1203 | AF3 ARsqF3(AF3 x){return rsqrt(x);} | ||
| 1204 | AF4 ARsqF4(AF4 x){return rsqrt(x);} | ||
| 1205 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1206 | AF1 ASatF1(AF1 x){return saturate(x);} | ||
| 1207 | AF2 ASatF2(AF2 x){return saturate(x);} | ||
| 1208 | AF3 ASatF3(AF3 x){return saturate(x);} | ||
| 1209 | AF4 ASatF4(AF4 x){return saturate(x);} | ||
| 1210 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1211 | AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));} | ||
| 1212 | AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} | ||
| 1213 | AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} | ||
| 1214 | AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} | ||
| 1215 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1216 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1217 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1218 | //============================================================================================================================== | ||
| 1219 | // HLSL BYTE | ||
| 1220 | //============================================================================================================================== | ||
| 1221 | #ifdef A_BYTE | ||
| 1222 | #endif | ||
| 1223 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1224 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1225 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1226 | //============================================================================================================================== | ||
| 1227 | // HLSL HALF | ||
| 1228 | //============================================================================================================================== | ||
| 1229 | #ifdef A_HALF | ||
| 1230 | #ifdef A_HLSL_6_2 | ||
| 1231 | #define AH1 float16_t | ||
| 1232 | #define AH2 float16_t2 | ||
| 1233 | #define AH3 float16_t3 | ||
| 1234 | #define AH4 float16_t4 | ||
| 1235 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1236 | #define AW1 uint16_t | ||
| 1237 | #define AW2 uint16_t2 | ||
| 1238 | #define AW3 uint16_t3 | ||
| 1239 | #define AW4 uint16_t4 | ||
| 1240 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1241 | #define ASW1 int16_t | ||
| 1242 | #define ASW2 int16_t2 | ||
| 1243 | #define ASW3 int16_t3 | ||
| 1244 | #define ASW4 int16_t4 | ||
| 1245 | #else | ||
| 1246 | #define AH1 min16float | ||
| 1247 | #define AH2 min16float2 | ||
| 1248 | #define AH3 min16float3 | ||
| 1249 | #define AH4 min16float4 | ||
| 1250 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1251 | #define AW1 min16uint | ||
| 1252 | #define AW2 min16uint2 | ||
| 1253 | #define AW3 min16uint3 | ||
| 1254 | #define AW4 min16uint4 | ||
| 1255 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1256 | #define ASW1 min16int | ||
| 1257 | #define ASW2 min16int2 | ||
| 1258 | #define ASW3 min16int3 | ||
| 1259 | #define ASW4 min16int4 | ||
| 1260 | #endif | ||
| 1261 | //============================================================================================================================== | ||
| 1262 | // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). | ||
| 1263 | // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ | ||
| 1264 | AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);} | ||
| 1265 | AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));} | ||
| 1266 | AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);} | ||
| 1267 | AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));} | ||
| 1268 | #define AH2_AU1(x) AH2_AU1_x(AU1(x)) | ||
| 1269 | #define AH4_AU2(x) AH4_AU2_x(AU2(x)) | ||
| 1270 | #define AW2_AU1(x) AW2_AU1_x(AU1(x)) | ||
| 1271 | #define AW4_AU2(x) AW4_AU2_x(AU2(x)) | ||
| 1272 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1273 | AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);} | ||
| 1274 | AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));} | ||
| 1275 | AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);} | ||
| 1276 | AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));} | ||
| 1277 | #define AU1_AH2(x) AU1_AH2_x(AH2(x)) | ||
| 1278 | #define AU2_AH4(x) AU2_AH4_x(AH4(x)) | ||
| 1279 | #define AU1_AW2(x) AU1_AW2_x(AW2(x)) | ||
| 1280 | #define AU2_AW4(x) AU2_AW4_x(AW4(x)) | ||
| 1281 | //============================================================================================================================== | ||
| 1282 | #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) | ||
| 1283 | #define AW1_AH1(x) asuint16(x) | ||
| 1284 | #define AW2_AH2(x) asuint16(x) | ||
| 1285 | #define AW3_AH3(x) asuint16(x) | ||
| 1286 | #define AW4_AH4(x) asuint16(x) | ||
| 1287 | #else | ||
| 1288 | #define AW1_AH1(a) AW1(f32tof16(AF1(a))) | ||
| 1289 | #define AW2_AH2(a) AW2(AW1_AH1((a).x),AW1_AH1((a).y)) | ||
| 1290 | #define AW3_AH3(a) AW3(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z)) | ||
| 1291 | #define AW4_AH4(a) AW4(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z),AW1_AH1((a).w)) | ||
| 1292 | #endif | ||
| 1293 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1294 | #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) | ||
| 1295 | #define AH1_AW1(x) asfloat16(x) | ||
| 1296 | #define AH2_AW2(x) asfloat16(x) | ||
| 1297 | #define AH3_AW3(x) asfloat16(x) | ||
| 1298 | #define AH4_AW4(x) asfloat16(x) | ||
| 1299 | #else | ||
| 1300 | #define AH1_AW1(a) AH1(f16tof32(AU1(a))) | ||
| 1301 | #define AH2_AW2(a) AH2(AH1_AW1((a).x),AH1_AW1((a).y)) | ||
| 1302 | #define AH3_AW3(a) AH3(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z)) | ||
| 1303 | #define AH4_AW4(a) AH4(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z),AH1_AW1((a).w)) | ||
| 1304 | #endif | ||
| 1305 | //============================================================================================================================== | ||
| 1306 | AH1 AH1_x(AH1 a){return AH1(a);} | ||
| 1307 | AH2 AH2_x(AH1 a){return AH2(a,a);} | ||
| 1308 | AH3 AH3_x(AH1 a){return AH3(a,a,a);} | ||
| 1309 | AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} | ||
| 1310 | #define AH1_(a) AH1_x(AH1(a)) | ||
| 1311 | #define AH2_(a) AH2_x(AH1(a)) | ||
| 1312 | #define AH3_(a) AH3_x(AH1(a)) | ||
| 1313 | #define AH4_(a) AH4_x(AH1(a)) | ||
| 1314 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1315 | AW1 AW1_x(AW1 a){return AW1(a);} | ||
| 1316 | AW2 AW2_x(AW1 a){return AW2(a,a);} | ||
| 1317 | AW3 AW3_x(AW1 a){return AW3(a,a,a);} | ||
| 1318 | AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} | ||
| 1319 | #define AW1_(a) AW1_x(AW1(a)) | ||
| 1320 | #define AW2_(a) AW2_x(AW1(a)) | ||
| 1321 | #define AW3_(a) AW3_x(AW1(a)) | ||
| 1322 | #define AW4_(a) AW4_x(AW1(a)) | ||
| 1323 | //============================================================================================================================== | ||
| 1324 | AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} | ||
| 1325 | AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} | ||
| 1326 | AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} | ||
| 1327 | AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} | ||
| 1328 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1329 | AH1 AClampH1(AH1 x,AH1 n,AH1 m){return max(n,min(x,m));} | ||
| 1330 | AH2 AClampH2(AH2 x,AH2 n,AH2 m){return max(n,min(x,m));} | ||
| 1331 | AH3 AClampH3(AH3 x,AH3 n,AH3 m){return max(n,min(x,m));} | ||
| 1332 | AH4 AClampH4(AH4 x,AH4 n,AH4 m){return max(n,min(x,m));} | ||
| 1333 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1334 | // V_FRACT_F16 (note DX frac() is different). | ||
| 1335 | AH1 AFractH1(AH1 x){return x-floor(x);} | ||
| 1336 | AH2 AFractH2(AH2 x){return x-floor(x);} | ||
| 1337 | AH3 AFractH3(AH3 x){return x-floor(x);} | ||
| 1338 | AH4 AFractH4(AH4 x){return x-floor(x);} | ||
| 1339 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1340 | AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);} | ||
| 1341 | AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);} | ||
| 1342 | AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);} | ||
| 1343 | AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);} | ||
| 1344 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1345 | AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} | ||
| 1346 | AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} | ||
| 1347 | AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} | ||
| 1348 | AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} | ||
| 1349 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1350 | AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} | ||
| 1351 | AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} | ||
| 1352 | AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} | ||
| 1353 | AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} | ||
| 1354 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1355 | AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} | ||
| 1356 | AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} | ||
| 1357 | AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} | ||
| 1358 | AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} | ||
| 1359 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1360 | AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} | ||
| 1361 | AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} | ||
| 1362 | AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} | ||
| 1363 | AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} | ||
| 1364 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1365 | AH1 ARcpH1(AH1 x){return rcp(x);} | ||
| 1366 | AH2 ARcpH2(AH2 x){return rcp(x);} | ||
| 1367 | AH3 ARcpH3(AH3 x){return rcp(x);} | ||
| 1368 | AH4 ARcpH4(AH4 x){return rcp(x);} | ||
| 1369 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1370 | AH1 ARsqH1(AH1 x){return rsqrt(x);} | ||
| 1371 | AH2 ARsqH2(AH2 x){return rsqrt(x);} | ||
| 1372 | AH3 ARsqH3(AH3 x){return rsqrt(x);} | ||
| 1373 | AH4 ARsqH4(AH4 x){return rsqrt(x);} | ||
| 1374 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1375 | AH1 ASatH1(AH1 x){return saturate(x);} | ||
| 1376 | AH2 ASatH2(AH2 x){return saturate(x);} | ||
| 1377 | AH3 ASatH3(AH3 x){return saturate(x);} | ||
| 1378 | AH4 ASatH4(AH4 x){return saturate(x);} | ||
| 1379 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1380 | AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} | ||
| 1381 | AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} | ||
| 1382 | AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} | ||
| 1383 | AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} | ||
| 1384 | #endif | ||
| 1385 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1386 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1387 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1388 | //============================================================================================================================== | ||
| 1389 | // HLSL DOUBLE | ||
| 1390 | //============================================================================================================================== | ||
| 1391 | #ifdef A_DUBL | ||
| 1392 | #ifdef A_HLSL_6_2 | ||
| 1393 | #define AD1 float64_t | ||
| 1394 | #define AD2 float64_t2 | ||
| 1395 | #define AD3 float64_t3 | ||
| 1396 | #define AD4 float64_t4 | ||
| 1397 | #else | ||
| 1398 | #define AD1 double | ||
| 1399 | #define AD2 double2 | ||
| 1400 | #define AD3 double3 | ||
| 1401 | #define AD4 double4 | ||
| 1402 | #endif | ||
| 1403 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1404 | AD1 AD1_x(AD1 a){return AD1(a);} | ||
| 1405 | AD2 AD2_x(AD1 a){return AD2(a,a);} | ||
| 1406 | AD3 AD3_x(AD1 a){return AD3(a,a,a);} | ||
| 1407 | AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} | ||
| 1408 | #define AD1_(a) AD1_x(AD1(a)) | ||
| 1409 | #define AD2_(a) AD2_x(AD1(a)) | ||
| 1410 | #define AD3_(a) AD3_x(AD1(a)) | ||
| 1411 | #define AD4_(a) AD4_x(AD1(a)) | ||
| 1412 | //============================================================================================================================== | ||
| 1413 | AD1 AFractD1(AD1 a){return a-floor(a);} | ||
| 1414 | AD2 AFractD2(AD2 a){return a-floor(a);} | ||
| 1415 | AD3 AFractD3(AD3 a){return a-floor(a);} | ||
| 1416 | AD4 AFractD4(AD4 a){return a-floor(a);} | ||
| 1417 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1418 | AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);} | ||
| 1419 | AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);} | ||
| 1420 | AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);} | ||
| 1421 | AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);} | ||
| 1422 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1423 | AD1 ARcpD1(AD1 x){return rcp(x);} | ||
| 1424 | AD2 ARcpD2(AD2 x){return rcp(x);} | ||
| 1425 | AD3 ARcpD3(AD3 x){return rcp(x);} | ||
| 1426 | AD4 ARcpD4(AD4 x){return rcp(x);} | ||
| 1427 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1428 | AD1 ARsqD1(AD1 x){return rsqrt(x);} | ||
| 1429 | AD2 ARsqD2(AD2 x){return rsqrt(x);} | ||
| 1430 | AD3 ARsqD3(AD3 x){return rsqrt(x);} | ||
| 1431 | AD4 ARsqD4(AD4 x){return rsqrt(x);} | ||
| 1432 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1433 | AD1 ASatD1(AD1 x){return saturate(x);} | ||
| 1434 | AD2 ASatD2(AD2 x){return saturate(x);} | ||
| 1435 | AD3 ASatD3(AD3 x){return saturate(x);} | ||
| 1436 | AD4 ASatD4(AD4 x){return saturate(x);} | ||
| 1437 | #endif | ||
| 1438 | //============================================================================================================================== | ||
| 1439 | // HLSL WAVE | ||
| 1440 | //============================================================================================================================== | ||
| 1441 | #ifdef A_WAVE | ||
| 1442 | // Where 'x' must be a compile time literal. | ||
| 1443 | AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1444 | AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1445 | AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1446 | AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1447 | AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1448 | AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1449 | AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1450 | AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} | ||
| 1451 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1452 | #ifdef A_HALF | ||
| 1453 | AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(WaveReadLaneAt(AU1_AH2(v),WaveGetLaneIndex()^x));} | ||
| 1454 | AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(WaveReadLaneAt(AU2_AH4(v),WaveGetLaneIndex()^x));} | ||
| 1455 | AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(WaveReadLaneAt(AU1_AW2(v),WaveGetLaneIndex()^x));} | ||
| 1456 | AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU1(WaveReadLaneAt(AU1_AW4(v),WaveGetLaneIndex()^x));} | ||
| 1457 | #endif | ||
| 1458 | #endif | ||
| 1459 | //============================================================================================================================== | ||
| 1460 | #endif | ||
| 1461 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1462 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1463 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1464 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1465 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1466 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1467 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1468 | //============================================================================================================================== | ||
| 1469 | // | ||
| 1470 | // | ||
| 1471 | // GPU COMMON | ||
| 1472 | // | ||
| 1473 | // | ||
| 1474 | //============================================================================================================================== | ||
| 1475 | #ifdef A_GPU | ||
| 1476 | // Negative and positive infinity. | ||
| 1477 | #define A_INFP_F AF1_AU1(0x7f800000u) | ||
| 1478 | #define A_INFN_F AF1_AU1(0xff800000u) | ||
| 1479 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1480 | // Copy sign from 's' to positive 'd'. | ||
| 1481 | AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));} | ||
| 1482 | AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));} | ||
| 1483 | AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));} | ||
| 1484 | AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));} | ||
| 1485 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1486 | // Single operation to return (useful to create a mask to use in lerp for branch free logic), | ||
| 1487 | // m=NaN := 0 | ||
| 1488 | // m>=0 := 0 | ||
| 1489 | // m<0 := 1 | ||
| 1490 | // Uses the following useful floating point logic, | ||
| 1491 | // saturate(+a*(-INF)==-INF) := 0 | ||
| 1492 | // saturate( 0*(-INF)== NaN) := 0 | ||
| 1493 | // saturate(-a*(-INF)==+INF) := 1 | ||
| 1494 | AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));} | ||
| 1495 | AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));} | ||
| 1496 | AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));} | ||
| 1497 | AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));} | ||
| 1498 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1499 | AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));} | ||
| 1500 | AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));} | ||
| 1501 | AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));} | ||
| 1502 | AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));} | ||
| 1503 | //============================================================================================================================== | ||
| 1504 | #ifdef A_HALF | ||
| 1505 | #ifdef A_HLSL_6_2 | ||
| 1506 | #define A_INFP_H AH1_AW1((uint16_t)0x7c00u) | ||
| 1507 | #define A_INFN_H AH1_AW1((uint16_t)0xfc00u) | ||
| 1508 | #else | ||
| 1509 | #define A_INFP_H AH1_AW1(0x7c00u) | ||
| 1510 | #define A_INFN_H AH1_AW1(0xfc00u) | ||
| 1511 | #endif | ||
| 1512 | |||
| 1513 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1514 | AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));} | ||
| 1515 | AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));} | ||
| 1516 | AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));} | ||
| 1517 | AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));} | ||
| 1518 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1519 | AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));} | ||
| 1520 | AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));} | ||
| 1521 | AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));} | ||
| 1522 | AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));} | ||
| 1523 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1524 | AH1 AGtZeroH1(AH1 m){return ASatH1(m*AH1_(A_INFP_H));} | ||
| 1525 | AH2 AGtZeroH2(AH2 m){return ASatH2(m*AH2_(A_INFP_H));} | ||
| 1526 | AH3 AGtZeroH3(AH3 m){return ASatH3(m*AH3_(A_INFP_H));} | ||
| 1527 | AH4 AGtZeroH4(AH4 m){return ASatH4(m*AH4_(A_INFP_H));} | ||
| 1528 | #endif | ||
| 1529 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1530 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1531 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1532 | //============================================================================================================================== | ||
| 1533 | // [FIS] FLOAT INTEGER SORTABLE | ||
| 1534 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1535 | // Float to integer sortable. | ||
| 1536 | // - If sign bit=0, flip the sign bit (positives). | ||
| 1537 | // - If sign bit=1, flip all bits (negatives). | ||
| 1538 | // Integer sortable to float. | ||
| 1539 | // - If sign bit=1, flip the sign bit (positives). | ||
| 1540 | // - If sign bit=0, flip all bits (negatives). | ||
| 1541 | // Has nice side effects. | ||
| 1542 | // - Larger integers are more positive values. | ||
| 1543 | // - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). | ||
| 1544 | // Burns 3 ops for conversion {shift,or,xor}. | ||
| 1545 | //============================================================================================================================== | ||
| 1546 | AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} | ||
| 1547 | AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} | ||
| 1548 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1549 | // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value). | ||
| 1550 | AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} | ||
| 1551 | AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} | ||
| 1552 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1553 | #ifdef A_HALF | ||
| 1554 | AW1 AFisToW1(AW1 x){return x^(( AShrSW1(x,AW1_(15)))|AW1_(0x8000));} | ||
| 1555 | AW1 AFisFromW1(AW1 x){return x^((~AShrSW1(x,AW1_(15)))|AW1_(0x8000));} | ||
| 1556 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1557 | AW2 AFisToW2(AW2 x){return x^(( AShrSW2(x,AW2_(15)))|AW2_(0x8000));} | ||
| 1558 | AW2 AFisFromW2(AW2 x){return x^((~AShrSW2(x,AW2_(15)))|AW2_(0x8000));} | ||
| 1559 | #endif | ||
| 1560 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1561 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1562 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1563 | //============================================================================================================================== | ||
| 1564 | // [PERM] V_PERM_B32 | ||
| 1565 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1566 | // Support for V_PERM_B32 started in the 3rd generation of GCN. | ||
| 1567 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1568 | // yyyyxxxx - The 'i' input. | ||
| 1569 | // 76543210 | ||
| 1570 | // ======== | ||
| 1571 | // HGFEDCBA - Naming on permutation. | ||
| 1572 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1573 | // TODO | ||
| 1574 | // ==== | ||
| 1575 | // - Make sure compiler optimizes this. | ||
| 1576 | //============================================================================================================================== | ||
| 1577 | #ifdef A_HALF | ||
| 1578 | AU1 APerm0E0A(AU2 i){return((i.x )&0xffu)|((i.y<<16)&0xff0000u);} | ||
| 1579 | AU1 APerm0F0B(AU2 i){return((i.x>> 8)&0xffu)|((i.y<< 8)&0xff0000u);} | ||
| 1580 | AU1 APerm0G0C(AU2 i){return((i.x>>16)&0xffu)|((i.y )&0xff0000u);} | ||
| 1581 | AU1 APerm0H0D(AU2 i){return((i.x>>24)&0xffu)|((i.y>> 8)&0xff0000u);} | ||
| 1582 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1583 | AU1 APermHGFA(AU2 i){return((i.x )&0x000000ffu)|(i.y&0xffffff00u);} | ||
| 1584 | AU1 APermHGFC(AU2 i){return((i.x>>16)&0x000000ffu)|(i.y&0xffffff00u);} | ||
| 1585 | AU1 APermHGAE(AU2 i){return((i.x<< 8)&0x0000ff00u)|(i.y&0xffff00ffu);} | ||
| 1586 | AU1 APermHGCE(AU2 i){return((i.x>> 8)&0x0000ff00u)|(i.y&0xffff00ffu);} | ||
| 1587 | AU1 APermHAFE(AU2 i){return((i.x<<16)&0x00ff0000u)|(i.y&0xff00ffffu);} | ||
| 1588 | AU1 APermHCFE(AU2 i){return((i.x )&0x00ff0000u)|(i.y&0xff00ffffu);} | ||
| 1589 | AU1 APermAGFE(AU2 i){return((i.x<<24)&0xff000000u)|(i.y&0x00ffffffu);} | ||
| 1590 | AU1 APermCGFE(AU2 i){return((i.x<< 8)&0xff000000u)|(i.y&0x00ffffffu);} | ||
| 1591 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1592 | AU1 APermGCEA(AU2 i){return((i.x)&0x00ff00ffu)|((i.y<<8)&0xff00ff00u);} | ||
| 1593 | AU1 APermGECA(AU2 i){return(((i.x)&0xffu)|((i.x>>8)&0xff00u)|((i.y<<16)&0xff0000u)|((i.y<<8)&0xff000000u));} | ||
| 1594 | #endif | ||
| 1595 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1596 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1597 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1598 | //============================================================================================================================== | ||
| 1599 | // [BUC] BYTE UNSIGNED CONVERSION | ||
| 1600 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1601 | // Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation. | ||
| 1602 | // Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively. | ||
| 1603 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1604 | // OPCODE NOTES | ||
| 1605 | // ============ | ||
| 1606 | // GCN does not do UNORM or SNORM for bytes in opcodes. | ||
| 1607 | // - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float. | ||
| 1608 | // - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer). | ||
| 1609 | // V_PERM_B32 does byte packing with ability to zero fill bytes as well. | ||
| 1610 | // - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo. | ||
| 1611 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1612 | // BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops. | ||
| 1613 | // ==== ===== | ||
| 1614 | // 0 : 0 | ||
| 1615 | // 1 : 1 | ||
| 1616 | // ... | ||
| 1617 | // 255 : 255 | ||
| 1618 | // : 256 (just outside the encoding range) | ||
| 1619 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1620 | // BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. | ||
| 1621 | // ==== ===== | ||
| 1622 | // 0 : 0 | ||
| 1623 | // 1 : 1/512 | ||
| 1624 | // 2 : 1/256 | ||
| 1625 | // ... | ||
| 1626 | // 64 : 1/8 | ||
| 1627 | // 128 : 1/4 | ||
| 1628 | // 255 : 255/512 | ||
| 1629 | // : 1/2 (just outside the encoding range) | ||
| 1630 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1631 | // OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES | ||
| 1632 | // ============================================ | ||
| 1633 | // r=ABuc0FromU1(i) | ||
| 1634 | // V_CVT_F32_UBYTE0 r,i | ||
| 1635 | // -------------------------------------------- | ||
| 1636 | // r=ABuc0ToU1(d,i) | ||
| 1637 | // V_CVT_PKACCUM_U8_F32 r,i,0,d | ||
| 1638 | // -------------------------------------------- | ||
| 1639 | // d=ABuc0FromU2(i) | ||
| 1640 | // Where 'k0' is an SGPR with 0x0E0A | ||
| 1641 | // Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits | ||
| 1642 | // V_PERM_B32 d,i.x,i.y,k0 | ||
| 1643 | // V_PK_FMA_F16 d,d,k1.x,0 | ||
| 1644 | // -------------------------------------------- | ||
| 1645 | // r=ABuc0ToU2(d,i) | ||
| 1646 | // Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits | ||
| 1647 | // Where 'k1' is an SGPR with 0x???? | ||
| 1648 | // Where 'k2' is an SGPR with 0x???? | ||
| 1649 | // V_PK_FMA_F16 i,i,k0.x,0 | ||
| 1650 | // V_PERM_B32 r.x,i,i,k1 | ||
| 1651 | // V_PERM_B32 r.y,i,i,k2 | ||
| 1652 | //============================================================================================================================== | ||
| 1653 | // Peak range for 32-bit and 16-bit operations. | ||
| 1654 | #define A_BUC_32 (255.0) | ||
| 1655 | #define A_BUC_16 (255.0/512.0) | ||
| 1656 | //============================================================================================================================== | ||
| 1657 | #if 1 | ||
| 1658 | // Designed to be one V_CVT_PKACCUM_U8_F32. | ||
| 1659 | // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32. | ||
| 1660 | AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u) )&(0x000000ffu));} | ||
| 1661 | AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));} | ||
| 1662 | AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));} | ||
| 1663 | AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));} | ||
| 1664 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1665 | // Designed to be one V_CVT_F32_UBYTE*. | ||
| 1666 | AF1 ABuc0FromU1(AU1 i){return AF1((i )&255u);} | ||
| 1667 | AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);} | ||
| 1668 | AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);} | ||
| 1669 | AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);} | ||
| 1670 | #endif | ||
| 1671 | //============================================================================================================================== | ||
| 1672 | #ifdef A_HALF | ||
| 1673 | // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. | ||
| 1674 | AW2 ABuc01ToW2(AH2 x,AH2 y){x*=AH2_(1.0/32768.0);y*=AH2_(1.0/32768.0); | ||
| 1675 | return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} | ||
| 1676 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1677 | // Designed for 3 ops to do SOA to AOS and conversion. | ||
| 1678 | AU2 ABuc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); | ||
| 1679 | return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} | ||
| 1680 | AU2 ABuc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); | ||
| 1681 | return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} | ||
| 1682 | AU2 ABuc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); | ||
| 1683 | return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} | ||
| 1684 | AU2 ABuc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); | ||
| 1685 | return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} | ||
| 1686 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1687 | // Designed for 2 ops to do both AOS to SOA, and conversion. | ||
| 1688 | AH2 ABuc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0);} | ||
| 1689 | AH2 ABuc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0);} | ||
| 1690 | AH2 ABuc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0);} | ||
| 1691 | AH2 ABuc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0);} | ||
| 1692 | #endif | ||
| 1693 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1694 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1695 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1696 | //============================================================================================================================== | ||
| 1697 | // [BSC] BYTE SIGNED CONVERSION | ||
| 1698 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1699 | // Similar to [BUC]. | ||
| 1700 | // Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively. | ||
| 1701 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1702 | // ENCODING (without zero-based encoding) | ||
| 1703 | // ======== | ||
| 1704 | // 0 = unused (can be used to mean something else) | ||
| 1705 | // 1 = lowest value | ||
| 1706 | // 128 = exact zero center (zero based encoding | ||
| 1707 | // 255 = highest value | ||
| 1708 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1709 | // Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero). | ||
| 1710 | // This is useful if there is a desire for cleared values to decode as zero. | ||
| 1711 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1712 | // BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. | ||
| 1713 | // ==== ===== | ||
| 1714 | // 0 : -127/512 (unused) | ||
| 1715 | // 1 : -126/512 | ||
| 1716 | // 2 : -125/512 | ||
| 1717 | // ... | ||
| 1718 | // 128 : 0 | ||
| 1719 | // ... | ||
| 1720 | // 255 : 127/512 | ||
| 1721 | // : 1/4 (just outside the encoding range) | ||
| 1722 | //============================================================================================================================== | ||
| 1723 | // Peak range for 32-bit and 16-bit operations. | ||
| 1724 | #define A_BSC_32 (127.0) | ||
| 1725 | #define A_BSC_16 (127.0/512.0) | ||
| 1726 | //============================================================================================================================== | ||
| 1727 | #if 1 | ||
| 1728 | AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u) )&(0x000000ffu));} | ||
| 1729 | AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));} | ||
| 1730 | AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));} | ||
| 1731 | AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));} | ||
| 1732 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1733 | AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u) )&(0x000000ffu)))^0x00000080u;} | ||
| 1734 | AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;} | ||
| 1735 | AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;} | ||
| 1736 | AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;} | ||
| 1737 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1738 | AF1 ABsc0FromU1(AU1 i){return AF1((i )&255u)-128.0;} | ||
| 1739 | AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;} | ||
| 1740 | AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;} | ||
| 1741 | AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;} | ||
| 1742 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1743 | AF1 ABsc0FromZbU1(AU1 i){return AF1(((i )&255u)^0x80u)-128.0;} | ||
| 1744 | AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;} | ||
| 1745 | AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;} | ||
| 1746 | AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;} | ||
| 1747 | #endif | ||
| 1748 | //============================================================================================================================== | ||
| 1749 | #ifdef A_HALF | ||
| 1750 | // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. | ||
| 1751 | AW2 ABsc01ToW2(AH2 x,AH2 y){x=x*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);y=y*AH2_(1.0/32768.0)+AH2_(0.25/32768.0); | ||
| 1752 | return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} | ||
| 1753 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1754 | AU2 ABsc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); | ||
| 1755 | return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} | ||
| 1756 | AU2 ABsc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); | ||
| 1757 | return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} | ||
| 1758 | AU2 ABsc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); | ||
| 1759 | return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} | ||
| 1760 | AU2 ABsc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); | ||
| 1761 | return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} | ||
| 1762 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1763 | AU2 ABsc0ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; | ||
| 1764 | return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} | ||
| 1765 | AU2 ABsc1ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; | ||
| 1766 | return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} | ||
| 1767 | AU2 ABsc2ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; | ||
| 1768 | return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} | ||
| 1769 | AU2 ABsc3ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; | ||
| 1770 | return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} | ||
| 1771 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1772 | AH2 ABsc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1773 | AH2 ABsc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1774 | AH2 ABsc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1775 | AH2 ABsc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1776 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1777 | AH2 ABsc0FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1778 | AH2 ABsc1FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1779 | AH2 ABsc2FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1780 | AH2 ABsc3FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} | ||
| 1781 | #endif | ||
| 1782 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1783 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1784 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1785 | //============================================================================================================================== | ||
| 1786 | // HALF APPROXIMATIONS | ||
| 1787 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1788 | // These support only positive inputs. | ||
| 1789 | // Did not see value yet in specialization for range. | ||
| 1790 | // Using quick testing, ended up mostly getting the same "best" approximation for various ranges. | ||
| 1791 | // With hardware that can co-execute transcendentals, the value in approximations could be less than expected. | ||
| 1792 | // However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total. | ||
| 1793 | // And co-execution would require a compiler interleaving a lot of independent work for packed usage. | ||
| 1794 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1795 | // The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total). | ||
| 1796 | // Same with sqrt(), as this could be x*rsq() (7 ops). | ||
| 1797 | //============================================================================================================================== | ||
| 1798 | #ifdef A_HALF | ||
| 1799 | // Minimize squared error across full positive range, 2 ops. | ||
| 1800 | // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output. | ||
| 1801 | AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));} | ||
| 1802 | AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));} | ||
| 1803 | AH3 APrxLoSqrtH3(AH3 a){return AH3_AW3((AW3_AH3(a)>>AW3_(1))+AW3_(0x1de2));} | ||
| 1804 | AH4 APrxLoSqrtH4(AH4 a){return AH4_AW4((AW4_AH4(a)>>AW4_(1))+AW4_(0x1de2));} | ||
| 1805 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1806 | // Lower precision estimation, 1 op. | ||
| 1807 | // Minimize squared error across {smallest normal to 16384.0}. | ||
| 1808 | AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));} | ||
| 1809 | AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));} | ||
| 1810 | AH3 APrxLoRcpH3(AH3 a){return AH3_AW3(AW3_(0x7784)-AW3_AH3(a));} | ||
| 1811 | AH4 APrxLoRcpH4(AH4 a){return AH4_AW4(AW4_(0x7784)-AW4_AH4(a));} | ||
| 1812 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1813 | // Medium precision estimation, one Newton Raphson iteration, 3 ops. | ||
| 1814 | AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));} | ||
| 1815 | AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));} | ||
| 1816 | AH3 APrxMedRcpH3(AH3 a){AH3 b=AH3_AW3(AW3_(0x778d)-AW3_AH3(a));return b*(-b*a+AH3_(2.0));} | ||
| 1817 | AH4 APrxMedRcpH4(AH4 a){AH4 b=AH4_AW4(AW4_(0x778d)-AW4_AH4(a));return b*(-b*a+AH4_(2.0));} | ||
| 1818 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1819 | // Minimize squared error across {smallest normal to 16384.0}, 2 ops. | ||
| 1820 | AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));} | ||
| 1821 | AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));} | ||
| 1822 | AH3 APrxLoRsqH3(AH3 a){return AH3_AW3(AW3_(0x59a3)-(AW3_AH3(a)>>AW3_(1)));} | ||
| 1823 | AH4 APrxLoRsqH4(AH4 a){return AH4_AW4(AW4_(0x59a3)-(AW4_AH4(a)>>AW4_(1)));} | ||
| 1824 | #endif | ||
| 1825 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1826 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1827 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1828 | //============================================================================================================================== | ||
| 1829 | // FLOAT APPROXIMATIONS | ||
| 1830 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1831 | // Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN", | ||
| 1832 | // - Idea dates back to SGI, then to Quake 3, etc. | ||
| 1833 | // - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf | ||
| 1834 | // - sqrt(x)=rsqrt(x)*x | ||
| 1835 | // - rcp(x)=rsqrt(x)*rsqrt(x) for positive x | ||
| 1836 | // - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h | ||
| 1837 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1838 | // These below are from perhaps less complete searching for optimal. | ||
| 1839 | // Used FP16 normal range for testing with +4096 32-bit step size for sampling error. | ||
| 1840 | // So these match up well with the half approximations. | ||
| 1841 | //============================================================================================================================== | ||
| 1842 | AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));} | ||
| 1843 | AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));} | ||
| 1844 | AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));} | ||
| 1845 | AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));} | ||
| 1846 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1847 | AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));} | ||
| 1848 | AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));} | ||
| 1849 | AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));} | ||
| 1850 | AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));} | ||
| 1851 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1852 | AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));} | ||
| 1853 | AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));} | ||
| 1854 | AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));} | ||
| 1855 | AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));} | ||
| 1856 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1857 | AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));} | ||
| 1858 | AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));} | ||
| 1859 | AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));} | ||
| 1860 | AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));} | ||
| 1861 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1862 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1863 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1864 | //============================================================================================================================== | ||
| 1865 | // PQ APPROXIMATIONS | ||
| 1866 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1867 | // PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do | ||
| 1868 | // PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%. | ||
| 1869 | //============================================================================================================================== | ||
| 1870 | // Helpers | ||
| 1871 | AF1 Quart(AF1 a) { a = a * a; return a * a;} | ||
| 1872 | AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; } | ||
| 1873 | AF2 Quart(AF2 a) { a = a * a; return a * a; } | ||
| 1874 | AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; } | ||
| 1875 | AF3 Quart(AF3 a) { a = a * a; return a * a; } | ||
| 1876 | AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; } | ||
| 1877 | AF4 Quart(AF4 a) { a = a * a; return a * a; } | ||
| 1878 | AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; } | ||
| 1879 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1880 | AF1 APrxPQToGamma2(AF1 a) { return Quart(a); } | ||
| 1881 | AF1 APrxPQToLinear(AF1 a) { return Oct(a); } | ||
| 1882 | AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); } | ||
| 1883 | AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } | ||
| 1884 | AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); } | ||
| 1885 | AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); } | ||
| 1886 | AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } | ||
| 1887 | AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); } | ||
| 1888 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1889 | AF2 APrxPQToGamma2(AF2 a) { return Quart(a); } | ||
| 1890 | AF2 APrxPQToLinear(AF2 a) { return Oct(a); } | ||
| 1891 | AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); } | ||
| 1892 | AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } | ||
| 1893 | AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); } | ||
| 1894 | AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); } | ||
| 1895 | AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } | ||
| 1896 | AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); } | ||
| 1897 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1898 | AF3 APrxPQToGamma2(AF3 a) { return Quart(a); } | ||
| 1899 | AF3 APrxPQToLinear(AF3 a) { return Oct(a); } | ||
| 1900 | AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); } | ||
| 1901 | AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } | ||
| 1902 | AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); } | ||
| 1903 | AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); } | ||
| 1904 | AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } | ||
| 1905 | AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); } | ||
| 1906 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1907 | AF4 APrxPQToGamma2(AF4 a) { return Quart(a); } | ||
| 1908 | AF4 APrxPQToLinear(AF4 a) { return Oct(a); } | ||
| 1909 | AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); } | ||
| 1910 | AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } | ||
| 1911 | AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); } | ||
| 1912 | AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); } | ||
| 1913 | AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } | ||
| 1914 | AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); } | ||
| 1915 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1916 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1917 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1918 | //============================================================================================================================== | ||
| 1919 | // PARABOLIC SIN & COS | ||
| 1920 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1921 | // Approximate answers to transcendental questions. | ||
| 1922 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1923 | //============================================================================================================================== | ||
| 1924 | #if 1 | ||
| 1925 | // Valid input range is {-1 to 1} representing {0 to 2 pi}. | ||
| 1926 | // Output range is {-1/4 to 1/4} representing {-1 to 1}. | ||
| 1927 | AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD. | ||
| 1928 | AF2 APSinF2(AF2 x){return x*abs(x)-x;} | ||
| 1929 | AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT | ||
| 1930 | AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);} | ||
| 1931 | AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));} | ||
| 1932 | #endif | ||
| 1933 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1934 | #ifdef A_HALF | ||
| 1935 | // For a packed {sin,cos} pair, | ||
| 1936 | // - Native takes 16 clocks and 4 issue slots (no packed transcendentals). | ||
| 1937 | // - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed). | ||
| 1938 | AH1 APSinH1(AH1 x){return x*abs(x)-x;} | ||
| 1939 | AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA | ||
| 1940 | AH1 APCosH1(AH1 x){x=AFractH1(x*AH1_(0.5)+AH1_(0.75));x=x*AH1_(2.0)-AH1_(1.0);return APSinH1(x);} | ||
| 1941 | AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND | ||
| 1942 | AH2 APSinCosH1(AH1 x){AH1 y=AFractH1(x*AH1_(0.5)+AH1_(0.75));y=y*AH1_(2.0)-AH1_(1.0);return APSinH2(AH2(x,y));} | ||
| 1943 | #endif | ||
| 1944 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1945 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1946 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1947 | //============================================================================================================================== | ||
| 1948 | // [ZOL] ZERO ONE LOGIC | ||
| 1949 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1950 | // Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit. | ||
| 1951 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1952 | // 0 := false | ||
| 1953 | // 1 := true | ||
| 1954 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1955 | // AndNot(x,y) -> !(x&y) .... One op. | ||
| 1956 | // AndOr(x,y,z) -> (x&y)|z ... One op. | ||
| 1957 | // GtZero(x) -> x>0.0 ..... One op. | ||
| 1958 | // Sel(x,y,z) -> x?y:z ..... Two ops, has no precision loss. | ||
| 1959 | // Signed(x) -> x<0.0 ..... One op. | ||
| 1960 | // ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer. | ||
| 1961 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1962 | // OPTIMIZATION NOTES | ||
| 1963 | // ================== | ||
| 1964 | // - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'. | ||
| 1965 | // For example 'a.xy*k.xx+k.yy'. | ||
| 1966 | //============================================================================================================================== | ||
| 1967 | #if 1 | ||
| 1968 | AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);} | ||
| 1969 | AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);} | ||
| 1970 | AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);} | ||
| 1971 | AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);} | ||
| 1972 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1973 | AU1 AZolNotU1(AU1 x){return x^AU1_(1);} | ||
| 1974 | AU2 AZolNotU2(AU2 x){return x^AU2_(1);} | ||
| 1975 | AU3 AZolNotU3(AU3 x){return x^AU3_(1);} | ||
| 1976 | AU4 AZolNotU4(AU4 x){return x^AU4_(1);} | ||
| 1977 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1978 | AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);} | ||
| 1979 | AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);} | ||
| 1980 | AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);} | ||
| 1981 | AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);} | ||
| 1982 | //============================================================================================================================== | ||
| 1983 | AU1 AZolF1ToU1(AF1 x){return AU1(x);} | ||
| 1984 | AU2 AZolF2ToU2(AF2 x){return AU2(x);} | ||
| 1985 | AU3 AZolF3ToU3(AF3 x){return AU3(x);} | ||
| 1986 | AU4 AZolF4ToU4(AF4 x){return AU4(x);} | ||
| 1987 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1988 | // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled). | ||
| 1989 | AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);} | ||
| 1990 | AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);} | ||
| 1991 | AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);} | ||
| 1992 | AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);} | ||
| 1993 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1994 | AF1 AZolU1ToF1(AU1 x){return AF1(x);} | ||
| 1995 | AF2 AZolU2ToF2(AU2 x){return AF2(x);} | ||
| 1996 | AF3 AZolU3ToF3(AU3 x){return AF3(x);} | ||
| 1997 | AF4 AZolU4ToF4(AU4 x){return AF4(x);} | ||
| 1998 | //============================================================================================================================== | ||
| 1999 | AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);} | ||
| 2000 | AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);} | ||
| 2001 | AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);} | ||
| 2002 | AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);} | ||
| 2003 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2004 | AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);} | ||
| 2005 | AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);} | ||
| 2006 | AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);} | ||
| 2007 | AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);} | ||
| 2008 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2009 | AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);} | ||
| 2010 | AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);} | ||
| 2011 | AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);} | ||
| 2012 | AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);} | ||
| 2013 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2014 | AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));} | ||
| 2015 | AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));} | ||
| 2016 | AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));} | ||
| 2017 | AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));} | ||
| 2018 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2019 | AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;} | ||
| 2020 | AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;} | ||
| 2021 | AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;} | ||
| 2022 | AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;} | ||
| 2023 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2024 | AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);} | ||
| 2025 | AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);} | ||
| 2026 | AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);} | ||
| 2027 | AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);} | ||
| 2028 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2029 | AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;} | ||
| 2030 | AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;} | ||
| 2031 | AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;} | ||
| 2032 | AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;} | ||
| 2033 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2034 | AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));} | ||
| 2035 | AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));} | ||
| 2036 | AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));} | ||
| 2037 | AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));} | ||
| 2038 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2039 | AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));} | ||
| 2040 | AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));} | ||
| 2041 | AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));} | ||
| 2042 | AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));} | ||
| 2043 | #endif | ||
| 2044 | //============================================================================================================================== | ||
| 2045 | #ifdef A_HALF | ||
| 2046 | AW1 AZolAndW1(AW1 x,AW1 y){return min(x,y);} | ||
| 2047 | AW2 AZolAndW2(AW2 x,AW2 y){return min(x,y);} | ||
| 2048 | AW3 AZolAndW3(AW3 x,AW3 y){return min(x,y);} | ||
| 2049 | AW4 AZolAndW4(AW4 x,AW4 y){return min(x,y);} | ||
| 2050 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2051 | AW1 AZolNotW1(AW1 x){return x^AW1_(1);} | ||
| 2052 | AW2 AZolNotW2(AW2 x){return x^AW2_(1);} | ||
| 2053 | AW3 AZolNotW3(AW3 x){return x^AW3_(1);} | ||
| 2054 | AW4 AZolNotW4(AW4 x){return x^AW4_(1);} | ||
| 2055 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2056 | AW1 AZolOrW1(AW1 x,AW1 y){return max(x,y);} | ||
| 2057 | AW2 AZolOrW2(AW2 x,AW2 y){return max(x,y);} | ||
| 2058 | AW3 AZolOrW3(AW3 x,AW3 y){return max(x,y);} | ||
| 2059 | AW4 AZolOrW4(AW4 x,AW4 y){return max(x,y);} | ||
| 2060 | //============================================================================================================================== | ||
| 2061 | // Uses denormal trick. | ||
| 2062 | AW1 AZolH1ToW1(AH1 x){return AW1_AH1(x*AH1_AW1(AW1_(1)));} | ||
| 2063 | AW2 AZolH2ToW2(AH2 x){return AW2_AH2(x*AH2_AW2(AW2_(1)));} | ||
| 2064 | AW3 AZolH3ToW3(AH3 x){return AW3_AH3(x*AH3_AW3(AW3_(1)));} | ||
| 2065 | AW4 AZolH4ToW4(AH4 x){return AW4_AH4(x*AH4_AW4(AW4_(1)));} | ||
| 2066 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2067 | // AMD arch lacks a packed conversion opcode. | ||
| 2068 | AH1 AZolW1ToH1(AW1 x){return AH1_AW1(x*AW1_AH1(AH1_(1.0)));} | ||
| 2069 | AH2 AZolW2ToH2(AW2 x){return AH2_AW2(x*AW2_AH2(AH2_(1.0)));} | ||
| 2070 | AH3 AZolW1ToH3(AW3 x){return AH3_AW3(x*AW3_AH3(AH3_(1.0)));} | ||
| 2071 | AH4 AZolW2ToH4(AW4 x){return AH4_AW4(x*AW4_AH4(AH4_(1.0)));} | ||
| 2072 | //============================================================================================================================== | ||
| 2073 | AH1 AZolAndH1(AH1 x,AH1 y){return min(x,y);} | ||
| 2074 | AH2 AZolAndH2(AH2 x,AH2 y){return min(x,y);} | ||
| 2075 | AH3 AZolAndH3(AH3 x,AH3 y){return min(x,y);} | ||
| 2076 | AH4 AZolAndH4(AH4 x,AH4 y){return min(x,y);} | ||
| 2077 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2078 | AH1 ASolAndNotH1(AH1 x,AH1 y){return (-x)*y+AH1_(1.0);} | ||
| 2079 | AH2 ASolAndNotH2(AH2 x,AH2 y){return (-x)*y+AH2_(1.0);} | ||
| 2080 | AH3 ASolAndNotH3(AH3 x,AH3 y){return (-x)*y+AH3_(1.0);} | ||
| 2081 | AH4 ASolAndNotH4(AH4 x,AH4 y){return (-x)*y+AH4_(1.0);} | ||
| 2082 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2083 | AH1 AZolAndOrH1(AH1 x,AH1 y,AH1 z){return ASatH1(x*y+z);} | ||
| 2084 | AH2 AZolAndOrH2(AH2 x,AH2 y,AH2 z){return ASatH2(x*y+z);} | ||
| 2085 | AH3 AZolAndOrH3(AH3 x,AH3 y,AH3 z){return ASatH3(x*y+z);} | ||
| 2086 | AH4 AZolAndOrH4(AH4 x,AH4 y,AH4 z){return ASatH4(x*y+z);} | ||
| 2087 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2088 | AH1 AZolGtZeroH1(AH1 x){return ASatH1(x*AH1_(A_INFP_H));} | ||
| 2089 | AH2 AZolGtZeroH2(AH2 x){return ASatH2(x*AH2_(A_INFP_H));} | ||
| 2090 | AH3 AZolGtZeroH3(AH3 x){return ASatH3(x*AH3_(A_INFP_H));} | ||
| 2091 | AH4 AZolGtZeroH4(AH4 x){return ASatH4(x*AH4_(A_INFP_H));} | ||
| 2092 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2093 | AH1 AZolNotH1(AH1 x){return AH1_(1.0)-x;} | ||
| 2094 | AH2 AZolNotH2(AH2 x){return AH2_(1.0)-x;} | ||
| 2095 | AH3 AZolNotH3(AH3 x){return AH3_(1.0)-x;} | ||
| 2096 | AH4 AZolNotH4(AH4 x){return AH4_(1.0)-x;} | ||
| 2097 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2098 | AH1 AZolOrH1(AH1 x,AH1 y){return max(x,y);} | ||
| 2099 | AH2 AZolOrH2(AH2 x,AH2 y){return max(x,y);} | ||
| 2100 | AH3 AZolOrH3(AH3 x,AH3 y){return max(x,y);} | ||
| 2101 | AH4 AZolOrH4(AH4 x,AH4 y){return max(x,y);} | ||
| 2102 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2103 | AH1 AZolSelH1(AH1 x,AH1 y,AH1 z){AH1 r=(-x)*z+z;return x*y+r;} | ||
| 2104 | AH2 AZolSelH2(AH2 x,AH2 y,AH2 z){AH2 r=(-x)*z+z;return x*y+r;} | ||
| 2105 | AH3 AZolSelH3(AH3 x,AH3 y,AH3 z){AH3 r=(-x)*z+z;return x*y+r;} | ||
| 2106 | AH4 AZolSelH4(AH4 x,AH4 y,AH4 z){AH4 r=(-x)*z+z;return x*y+r;} | ||
| 2107 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2108 | AH1 AZolSignedH1(AH1 x){return ASatH1(x*AH1_(A_INFN_H));} | ||
| 2109 | AH2 AZolSignedH2(AH2 x){return ASatH2(x*AH2_(A_INFN_H));} | ||
| 2110 | AH3 AZolSignedH3(AH3 x){return ASatH3(x*AH3_(A_INFN_H));} | ||
| 2111 | AH4 AZolSignedH4(AH4 x){return ASatH4(x*AH4_(A_INFN_H));} | ||
| 2112 | #endif | ||
| 2113 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2114 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2115 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2116 | //============================================================================================================================== | ||
| 2117 | // COLOR CONVERSIONS | ||
| 2118 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2119 | // These are all linear to/from some other space (where 'linear' has been shortened out of the function name). | ||
| 2120 | // So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'. | ||
| 2121 | // These are branch free implementations. | ||
| 2122 | // The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion. | ||
| 2123 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2124 | // TRANSFER FUNCTIONS | ||
| 2125 | // ================== | ||
| 2126 | // 709 ..... Rec709 used for some HDTVs | ||
| 2127 | // Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native | ||
| 2128 | // Pq ...... PQ native for HDR10 | ||
| 2129 | // Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type | ||
| 2130 | // Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations) | ||
| 2131 | // Three ... Gamma 3.0, less fast, but good for HDR. | ||
| 2132 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2133 | // KEEPING TO SPEC | ||
| 2134 | // =============== | ||
| 2135 | // Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. | ||
| 2136 | // (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). | ||
| 2137 | // (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). | ||
| 2138 | // Also there is a slight step in the transition regions. | ||
| 2139 | // Precision of the coefficients in the spec being the likely cause. | ||
| 2140 | // Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store. | ||
| 2141 | // This is to work around lack of hardware (typically only ROP does the conversion for free). | ||
| 2142 | // To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free). | ||
| 2143 | // So this header keeps with the spec. | ||
| 2144 | // For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear. | ||
| 2145 | // Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear. | ||
| 2146 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2147 | // FOR PQ | ||
| 2148 | // ====== | ||
| 2149 | // Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2. | ||
| 2150 | // All constants are only specified to FP32 precision. | ||
| 2151 | // External PQ source reference, | ||
| 2152 | // - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl | ||
| 2153 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2154 | // PACKED VERSIONS | ||
| 2155 | // =============== | ||
| 2156 | // These are the A*H2() functions. | ||
| 2157 | // There is no PQ functions as FP16 seemed to not have enough precision for the conversion. | ||
| 2158 | // The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors. | ||
| 2159 | // Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least). | ||
| 2160 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2161 | // NOTES | ||
| 2162 | // ===== | ||
| 2163 | // Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case. | ||
| 2164 | //============================================================================================================================== | ||
| 2165 | #if 1 | ||
| 2166 | AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); | ||
| 2167 | return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} | ||
| 2168 | AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); | ||
| 2169 | return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} | ||
| 2170 | AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); | ||
| 2171 | return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} | ||
| 2172 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2173 | // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma(). | ||
| 2174 | AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));} | ||
| 2175 | AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));} | ||
| 2176 | AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));} | ||
| 2177 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2178 | AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302)); | ||
| 2179 | return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));} | ||
| 2180 | AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302)); | ||
| 2181 | return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));} | ||
| 2182 | AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302)); | ||
| 2183 | return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));} | ||
| 2184 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2185 | AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); | ||
| 2186 | return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} | ||
| 2187 | AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); | ||
| 2188 | return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} | ||
| 2189 | AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); | ||
| 2190 | return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} | ||
| 2191 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2192 | AF1 AToTwoF1(AF1 c){return sqrt(c);} | ||
| 2193 | AF2 AToTwoF2(AF2 c){return sqrt(c);} | ||
| 2194 | AF3 AToTwoF3(AF3 c){return sqrt(c);} | ||
| 2195 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2196 | AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));} | ||
| 2197 | AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));} | ||
| 2198 | AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));} | ||
| 2199 | #endif | ||
| 2200 | //============================================================================================================================== | ||
| 2201 | #if 1 | ||
| 2202 | // Unfortunately median won't work here. | ||
| 2203 | AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); | ||
| 2204 | return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} | ||
| 2205 | AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); | ||
| 2206 | return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} | ||
| 2207 | AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); | ||
| 2208 | return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} | ||
| 2209 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2210 | AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));} | ||
| 2211 | AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));} | ||
| 2212 | AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));} | ||
| 2213 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2214 | AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833)); | ||
| 2215 | return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));} | ||
| 2216 | AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833)); | ||
| 2217 | return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));} | ||
| 2218 | AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833)); | ||
| 2219 | return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));} | ||
| 2220 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2221 | // Unfortunately median won't work here. | ||
| 2222 | AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); | ||
| 2223 | return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} | ||
| 2224 | AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); | ||
| 2225 | return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} | ||
| 2226 | AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); | ||
| 2227 | return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} | ||
| 2228 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2229 | AF1 AFromTwoF1(AF1 c){return c*c;} | ||
| 2230 | AF2 AFromTwoF2(AF2 c){return c*c;} | ||
| 2231 | AF3 AFromTwoF3(AF3 c){return c*c;} | ||
| 2232 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2233 | AF1 AFromThreeF1(AF1 c){return c*c*c;} | ||
| 2234 | AF2 AFromThreeF2(AF2 c){return c*c*c;} | ||
| 2235 | AF3 AFromThreeF3(AF3 c){return c*c*c;} | ||
| 2236 | #endif | ||
| 2237 | //============================================================================================================================== | ||
| 2238 | #ifdef A_HALF | ||
| 2239 | AH1 ATo709H1(AH1 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); | ||
| 2240 | return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} | ||
| 2241 | AH2 ATo709H2(AH2 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); | ||
| 2242 | return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} | ||
| 2243 | AH3 ATo709H3(AH3 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); | ||
| 2244 | return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} | ||
| 2245 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2246 | AH1 AToGammaH1(AH1 c,AH1 rcpX){return pow(c,AH1_(rcpX));} | ||
| 2247 | AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));} | ||
| 2248 | AH3 AToGammaH3(AH3 c,AH1 rcpX){return pow(c,AH3_(rcpX));} | ||
| 2249 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2250 | AH1 AToSrgbH1(AH1 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); | ||
| 2251 | return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} | ||
| 2252 | AH2 AToSrgbH2(AH2 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); | ||
| 2253 | return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} | ||
| 2254 | AH3 AToSrgbH3(AH3 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); | ||
| 2255 | return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} | ||
| 2256 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2257 | AH1 AToTwoH1(AH1 c){return sqrt(c);} | ||
| 2258 | AH2 AToTwoH2(AH2 c){return sqrt(c);} | ||
| 2259 | AH3 AToTwoH3(AH3 c){return sqrt(c);} | ||
| 2260 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2261 | AH1 AToThreeF1(AH1 c){return pow(c,AH1_(1.0/3.0));} | ||
| 2262 | AH2 AToThreeF2(AH2 c){return pow(c,AH2_(1.0/3.0));} | ||
| 2263 | AH3 AToThreeF3(AH3 c){return pow(c,AH3_(1.0/3.0));} | ||
| 2264 | #endif | ||
| 2265 | //============================================================================================================================== | ||
| 2266 | #ifdef A_HALF | ||
| 2267 | AH1 AFrom709H1(AH1 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); | ||
| 2268 | return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} | ||
| 2269 | AH2 AFrom709H2(AH2 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); | ||
| 2270 | return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} | ||
| 2271 | AH3 AFrom709H3(AH3 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); | ||
| 2272 | return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} | ||
| 2273 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2274 | AH1 AFromGammaH1(AH1 c,AH1 x){return pow(c,AH1_(x));} | ||
| 2275 | AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));} | ||
| 2276 | AH3 AFromGammaH3(AH3 c,AH1 x){return pow(c,AH3_(x));} | ||
| 2277 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2278 | AH1 AHromSrgbF1(AH1 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); | ||
| 2279 | return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} | ||
| 2280 | AH2 AHromSrgbF2(AH2 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); | ||
| 2281 | return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} | ||
| 2282 | AH3 AHromSrgbF3(AH3 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); | ||
| 2283 | return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} | ||
| 2284 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2285 | AH1 AFromTwoH1(AH1 c){return c*c;} | ||
| 2286 | AH2 AFromTwoH2(AH2 c){return c*c;} | ||
| 2287 | AH3 AFromTwoH3(AH3 c){return c*c;} | ||
| 2288 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2289 | AH1 AFromThreeH1(AH1 c){return c*c*c;} | ||
| 2290 | AH2 AFromThreeH2(AH2 c){return c*c*c;} | ||
| 2291 | AH3 AFromThreeH3(AH3 c){return c*c*c;} | ||
| 2292 | #endif | ||
| 2293 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2294 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2295 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2296 | //============================================================================================================================== | ||
| 2297 | // CS REMAP | ||
| 2298 | //============================================================================================================================== | ||
| 2299 | // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear. | ||
| 2300 | // 543210 | ||
| 2301 | // ====== | ||
| 2302 | // ..xxx. | ||
| 2303 | // yy...y | ||
| 2304 | AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} | ||
| 2305 | //============================================================================================================================== | ||
| 2306 | // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions. | ||
| 2307 | // 543210 | ||
| 2308 | // ====== | ||
| 2309 | // .xx..x | ||
| 2310 | // y..yy. | ||
| 2311 | // Details, | ||
| 2312 | // LANE TO 8x8 MAPPING | ||
| 2313 | // =================== | ||
| 2314 | // 00 01 08 09 10 11 18 19 | ||
| 2315 | // 02 03 0a 0b 12 13 1a 1b | ||
| 2316 | // 04 05 0c 0d 14 15 1c 1d | ||
| 2317 | // 06 07 0e 0f 16 17 1e 1f | ||
| 2318 | // 20 21 28 29 30 31 38 39 | ||
| 2319 | // 22 23 2a 2b 32 33 3a 3b | ||
| 2320 | // 24 25 2c 2d 34 35 3c 3d | ||
| 2321 | // 26 27 2e 2f 36 37 3e 3f | ||
| 2322 | AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} | ||
| 2323 | //============================================================================================================================== | ||
| 2324 | #ifdef A_HALF | ||
| 2325 | AW2 ARmp8x8H(AU1 a){return AW2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} | ||
| 2326 | AW2 ARmpRed8x8H(AU1 a){return AW2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} | ||
| 2327 | #endif | ||
| 2328 | #endif | ||
| 2329 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2330 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2331 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2332 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2333 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2334 | //============================================================================================================================== | ||
| 2335 | // | ||
| 2336 | // REFERENCE | ||
| 2337 | // | ||
| 2338 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2339 | // IEEE FLOAT RULES | ||
| 2340 | // ================ | ||
| 2341 | // - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1 | ||
| 2342 | // - {+/-}0 * {+/-}INF = NaN | ||
| 2343 | // - -INF + (+INF) = NaN | ||
| 2344 | // - {+/-}0 / {+/-}0 = NaN | ||
| 2345 | // - {+/-}INF / {+/-}INF = NaN | ||
| 2346 | // - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN) | ||
| 2347 | // - 0 == -0 | ||
| 2348 | // - 4/0 = +INF | ||
| 2349 | // - 4/-0 = -INF | ||
| 2350 | // - 4+INF = +INF | ||
| 2351 | // - 4-INF = -INF | ||
| 2352 | // - 4*(+INF) = +INF | ||
| 2353 | // - 4*(-INF) = -INF | ||
| 2354 | // - -4*(+INF) = -INF | ||
| 2355 | // - sqrt(+INF) = +INF | ||
| 2356 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2357 | // FP16 ENCODING | ||
| 2358 | // ============= | ||
| 2359 | // fedcba9876543210 | ||
| 2360 | // ---------------- | ||
| 2361 | // ......mmmmmmmmmm 10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals) | ||
| 2362 | // .eeeee.......... 5-bit exponent | ||
| 2363 | // .00000.......... denormals | ||
| 2364 | // .00001.......... -14 exponent | ||
| 2365 | // .11110.......... 15 exponent | ||
| 2366 | // .111110000000000 infinity | ||
| 2367 | // .11111nnnnnnnnnn NaN with n!=0 | ||
| 2368 | // s............... sign | ||
| 2369 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2370 | // FP16/INT16 ALIASING DENORMAL | ||
| 2371 | // ============================ | ||
| 2372 | // 11-bit unsigned integers alias with half float denormal/normal values, | ||
| 2373 | // 1 = 2^(-24) = 1/16777216 ....................... first denormal value | ||
| 2374 | // 2 = 2^(-23) | ||
| 2375 | // ... | ||
| 2376 | // 1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value | ||
| 2377 | // 1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers | ||
| 2378 | // 2047 .............................................. last normal value that still maps to integers | ||
| 2379 | // Scaling limits, | ||
| 2380 | // 2^15 = 32768 ...................................... largest power of 2 scaling | ||
| 2381 | // Largest pow2 conversion mapping is at *32768, | ||
| 2382 | // 1 : 2^(-9) = 1/512 | ||
| 2383 | // 2 : 1/256 | ||
| 2384 | // 4 : 1/128 | ||
| 2385 | // 8 : 1/64 | ||
| 2386 | // 16 : 1/32 | ||
| 2387 | // 32 : 1/16 | ||
| 2388 | // 64 : 1/8 | ||
| 2389 | // 128 : 1/4 | ||
| 2390 | // 256 : 1/2 | ||
| 2391 | // 512 : 1 | ||
| 2392 | // 1024 : 2 | ||
| 2393 | // 2047 : a little less than 4 | ||
| 2394 | //============================================================================================================================== | ||
| 2395 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2396 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2397 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2398 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2399 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2400 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2401 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2402 | //============================================================================================================================== | ||
| 2403 | // | ||
| 2404 | // | ||
| 2405 | // GPU/CPU PORTABILITY | ||
| 2406 | // | ||
| 2407 | // | ||
| 2408 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2409 | // This is the GPU implementation. | ||
| 2410 | // See the CPU implementation for docs. | ||
| 2411 | //============================================================================================================================== | ||
| 2412 | #ifdef A_GPU | ||
| 2413 | #define A_TRUE true | ||
| 2414 | #define A_FALSE false | ||
| 2415 | #define A_STATIC | ||
| 2416 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2417 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2418 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2419 | //============================================================================================================================== | ||
| 2420 | // VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY | ||
| 2421 | //============================================================================================================================== | ||
| 2422 | #define retAD2 AD2 | ||
| 2423 | #define retAD3 AD3 | ||
| 2424 | #define retAD4 AD4 | ||
| 2425 | #define retAF2 AF2 | ||
| 2426 | #define retAF3 AF3 | ||
| 2427 | #define retAF4 AF4 | ||
| 2428 | #define retAL2 AL2 | ||
| 2429 | #define retAL3 AL3 | ||
| 2430 | #define retAL4 AL4 | ||
| 2431 | #define retAU2 AU2 | ||
| 2432 | #define retAU3 AU3 | ||
| 2433 | #define retAU4 AU4 | ||
| 2434 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2435 | #define inAD2 in AD2 | ||
| 2436 | #define inAD3 in AD3 | ||
| 2437 | #define inAD4 in AD4 | ||
| 2438 | #define inAF2 in AF2 | ||
| 2439 | #define inAF3 in AF3 | ||
| 2440 | #define inAF4 in AF4 | ||
| 2441 | #define inAL2 in AL2 | ||
| 2442 | #define inAL3 in AL3 | ||
| 2443 | #define inAL4 in AL4 | ||
| 2444 | #define inAU2 in AU2 | ||
| 2445 | #define inAU3 in AU3 | ||
| 2446 | #define inAU4 in AU4 | ||
| 2447 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2448 | #define inoutAD2 inout AD2 | ||
| 2449 | #define inoutAD3 inout AD3 | ||
| 2450 | #define inoutAD4 inout AD4 | ||
| 2451 | #define inoutAF2 inout AF2 | ||
| 2452 | #define inoutAF3 inout AF3 | ||
| 2453 | #define inoutAF4 inout AF4 | ||
| 2454 | #define inoutAL2 inout AL2 | ||
| 2455 | #define inoutAL3 inout AL3 | ||
| 2456 | #define inoutAL4 inout AL4 | ||
| 2457 | #define inoutAU2 inout AU2 | ||
| 2458 | #define inoutAU3 inout AU3 | ||
| 2459 | #define inoutAU4 inout AU4 | ||
| 2460 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2461 | #define outAD2 out AD2 | ||
| 2462 | #define outAD3 out AD3 | ||
| 2463 | #define outAD4 out AD4 | ||
| 2464 | #define outAF2 out AF2 | ||
| 2465 | #define outAF3 out AF3 | ||
| 2466 | #define outAF4 out AF4 | ||
| 2467 | #define outAL2 out AL2 | ||
| 2468 | #define outAL3 out AL3 | ||
| 2469 | #define outAL4 out AL4 | ||
| 2470 | #define outAU2 out AU2 | ||
| 2471 | #define outAU3 out AU3 | ||
| 2472 | #define outAU4 out AU4 | ||
| 2473 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2474 | #define varAD2(x) AD2 x | ||
| 2475 | #define varAD3(x) AD3 x | ||
| 2476 | #define varAD4(x) AD4 x | ||
| 2477 | #define varAF2(x) AF2 x | ||
| 2478 | #define varAF3(x) AF3 x | ||
| 2479 | #define varAF4(x) AF4 x | ||
| 2480 | #define varAL2(x) AL2 x | ||
| 2481 | #define varAL3(x) AL3 x | ||
| 2482 | #define varAL4(x) AL4 x | ||
| 2483 | #define varAU2(x) AU2 x | ||
| 2484 | #define varAU3(x) AU3 x | ||
| 2485 | #define varAU4(x) AU4 x | ||
| 2486 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2487 | #define initAD2(x,y) AD2(x,y) | ||
| 2488 | #define initAD3(x,y,z) AD3(x,y,z) | ||
| 2489 | #define initAD4(x,y,z,w) AD4(x,y,z,w) | ||
| 2490 | #define initAF2(x,y) AF2(x,y) | ||
| 2491 | #define initAF3(x,y,z) AF3(x,y,z) | ||
| 2492 | #define initAF4(x,y,z,w) AF4(x,y,z,w) | ||
| 2493 | #define initAL2(x,y) AL2(x,y) | ||
| 2494 | #define initAL3(x,y,z) AL3(x,y,z) | ||
| 2495 | #define initAL4(x,y,z,w) AL4(x,y,z,w) | ||
| 2496 | #define initAU2(x,y) AU2(x,y) | ||
| 2497 | #define initAU3(x,y,z) AU3(x,y,z) | ||
| 2498 | #define initAU4(x,y,z,w) AU4(x,y,z,w) | ||
| 2499 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2500 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2501 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2502 | //============================================================================================================================== | ||
| 2503 | // SCALAR RETURN OPS | ||
| 2504 | //============================================================================================================================== | ||
| 2505 | #define AAbsD1(a) abs(AD1(a)) | ||
| 2506 | #define AAbsF1(a) abs(AF1(a)) | ||
| 2507 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2508 | #define ACosD1(a) cos(AD1(a)) | ||
| 2509 | #define ACosF1(a) cos(AF1(a)) | ||
| 2510 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2511 | #define ADotD2(a,b) dot(AD2(a),AD2(b)) | ||
| 2512 | #define ADotD3(a,b) dot(AD3(a),AD3(b)) | ||
| 2513 | #define ADotD4(a,b) dot(AD4(a),AD4(b)) | ||
| 2514 | #define ADotF2(a,b) dot(AF2(a),AF2(b)) | ||
| 2515 | #define ADotF3(a,b) dot(AF3(a),AF3(b)) | ||
| 2516 | #define ADotF4(a,b) dot(AF4(a),AF4(b)) | ||
| 2517 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2518 | #define AExp2D1(a) exp2(AD1(a)) | ||
| 2519 | #define AExp2F1(a) exp2(AF1(a)) | ||
| 2520 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2521 | #define AFloorD1(a) floor(AD1(a)) | ||
| 2522 | #define AFloorF1(a) floor(AF1(a)) | ||
| 2523 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2524 | #define ALog2D1(a) log2(AD1(a)) | ||
| 2525 | #define ALog2F1(a) log2(AF1(a)) | ||
| 2526 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2527 | #define AMaxD1(a,b) max(a,b) | ||
| 2528 | #define AMaxF1(a,b) max(a,b) | ||
| 2529 | #define AMaxL1(a,b) max(a,b) | ||
| 2530 | #define AMaxU1(a,b) max(a,b) | ||
| 2531 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2532 | #define AMinD1(a,b) min(a,b) | ||
| 2533 | #define AMinF1(a,b) min(a,b) | ||
| 2534 | #define AMinL1(a,b) min(a,b) | ||
| 2535 | #define AMinU1(a,b) min(a,b) | ||
| 2536 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2537 | #define ASinD1(a) sin(AD1(a)) | ||
| 2538 | #define ASinF1(a) sin(AF1(a)) | ||
| 2539 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2540 | #define ASqrtD1(a) sqrt(AD1(a)) | ||
| 2541 | #define ASqrtF1(a) sqrt(AF1(a)) | ||
| 2542 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2543 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2544 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2545 | //============================================================================================================================== | ||
| 2546 | // SCALAR RETURN OPS - DEPENDENT | ||
| 2547 | //============================================================================================================================== | ||
| 2548 | #define APowD1(a,b) pow(AD1(a),AF1(b)) | ||
| 2549 | #define APowF1(a,b) pow(AF1(a),AF1(b)) | ||
| 2550 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2551 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 2552 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2553 | //============================================================================================================================== | ||
| 2554 | // VECTOR OPS | ||
| 2555 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2556 | // These are added as needed for production or prototyping, so not necessarily a complete set. | ||
| 2557 | // They follow a convention of taking in a destination and also returning the destination value to increase utility. | ||
| 2558 | //============================================================================================================================== | ||
| 2559 | #ifdef A_DUBL | ||
| 2560 | AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;} | ||
| 2561 | AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;} | ||
| 2562 | AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;} | ||
| 2563 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2564 | AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;} | ||
| 2565 | AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;} | ||
| 2566 | AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;} | ||
| 2567 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2568 | AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;} | ||
| 2569 | AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;} | ||
| 2570 | AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;} | ||
| 2571 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2572 | AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;} | ||
| 2573 | AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;} | ||
| 2574 | AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;} | ||
| 2575 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2576 | AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;} | ||
| 2577 | AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;} | ||
| 2578 | AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;} | ||
| 2579 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2580 | AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;} | ||
| 2581 | AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;} | ||
| 2582 | AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;} | ||
| 2583 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2584 | AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;} | ||
| 2585 | AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;} | ||
| 2586 | AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;} | ||
| 2587 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2588 | AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;} | ||
| 2589 | AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;} | ||
| 2590 | AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;} | ||
| 2591 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2592 | AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;} | ||
| 2593 | AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;} | ||
| 2594 | AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;} | ||
| 2595 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2596 | AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;} | ||
| 2597 | AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;} | ||
| 2598 | AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;} | ||
| 2599 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2600 | AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;} | ||
| 2601 | AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;} | ||
| 2602 | AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;} | ||
| 2603 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2604 | AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;} | ||
| 2605 | AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;} | ||
| 2606 | AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;} | ||
| 2607 | #endif | ||
| 2608 | //============================================================================================================================== | ||
| 2609 | AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;} | ||
| 2610 | AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;} | ||
| 2611 | AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;} | ||
| 2612 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2613 | AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;} | ||
| 2614 | AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;} | ||
| 2615 | AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;} | ||
| 2616 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2617 | AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;} | ||
| 2618 | AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;} | ||
| 2619 | AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;} | ||
| 2620 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2621 | AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;} | ||
| 2622 | AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;} | ||
| 2623 | AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;} | ||
| 2624 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2625 | AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;} | ||
| 2626 | AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;} | ||
| 2627 | AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;} | ||
| 2628 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2629 | AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;} | ||
| 2630 | AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;} | ||
| 2631 | AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;} | ||
| 2632 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2633 | AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;} | ||
| 2634 | AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;} | ||
| 2635 | AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;} | ||
| 2636 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2637 | AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;} | ||
| 2638 | AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;} | ||
| 2639 | AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;} | ||
| 2640 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2641 | AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;} | ||
| 2642 | AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;} | ||
| 2643 | AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;} | ||
| 2644 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2645 | AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;} | ||
| 2646 | AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;} | ||
| 2647 | AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;} | ||
| 2648 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2649 | AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;} | ||
| 2650 | AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;} | ||
| 2651 | AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;} | ||
| 2652 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 2653 | AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;} | ||
| 2654 | AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;} | ||
| 2655 | AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;} | ||
| 2656 | #endif | ||
diff --git a/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h b/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h new file mode 100644 index 000000000..15ecfde5c --- /dev/null +++ b/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h | |||
| @@ -0,0 +1,1199 @@ | |||
| 1 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 2 | //============================================================================================================================== | ||
| 3 | // | ||
| 4 | // | ||
| 5 | // AMD FidelityFX SUPER RESOLUTION [FSR 1] ::: SPATIAL SCALING & EXTRAS - v1.20210629 | ||
| 6 | // | ||
| 7 | // | ||
| 8 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 9 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 10 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 11 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 12 | // FidelityFX Super Resolution Sample | ||
| 13 | // | ||
| 14 | // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. | ||
| 15 | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 16 | // of this software and associated documentation files(the "Software"), to deal | ||
| 17 | // in the Software without restriction, including without limitation the rights | ||
| 18 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell | ||
| 19 | // copies of the Software, and to permit persons to whom the Software is | ||
| 20 | // furnished to do so, subject to the following conditions : | ||
| 21 | // The above copyright notice and this permission notice shall be included in | ||
| 22 | // all copies or substantial portions of the Software. | ||
| 23 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 24 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 25 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE | ||
| 26 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 27 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 28 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 29 | // THE SOFTWARE. | ||
| 30 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 31 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 32 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 33 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 34 | // ABOUT | ||
| 35 | // ===== | ||
| 36 | // FSR is a collection of algorithms relating to generating a higher resolution image. | ||
| 37 | // This specific header focuses on single-image non-temporal image scaling, and related tools. | ||
| 38 | // | ||
| 39 | // The core functions are EASU and RCAS: | ||
| 40 | // [EASU] Edge Adaptive Spatial Upsampling ....... 1x to 4x area range spatial scaling, clamped adaptive elliptical filter. | ||
| 41 | // [RCAS] Robust Contrast Adaptive Sharpening .... A non-scaling variation on CAS. | ||
| 42 | // RCAS needs to be applied after EASU as a separate pass. | ||
| 43 | // | ||
| 44 | // Optional utility functions are: | ||
| 45 | // [LFGA] Linear Film Grain Applicator ........... Tool to apply film grain after scaling. | ||
| 46 | // [SRTM] Simple Reversible Tone-Mapper .......... Linear HDR {0 to FP16_MAX} to {0 to 1} and back. | ||
| 47 | // [TEPD] Temporal Energy Preserving Dither ...... Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. | ||
| 48 | // See each individual sub-section for inline documentation. | ||
| 49 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 50 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 51 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 52 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 53 | // FUNCTION PERMUTATIONS | ||
| 54 | // ===================== | ||
| 55 | // *F() ..... Single item computation with 32-bit. | ||
| 56 | // *H() ..... Single item computation with 16-bit, with packing (aka two 16-bit ops in parallel) when possible. | ||
| 57 | // *Hx2() ... Processing two items in parallel with 16-bit, easier packing. | ||
| 58 | // Not all interfaces in this file have a *Hx2() form. | ||
| 59 | //============================================================================================================================== | ||
| 60 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 61 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 62 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 63 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 64 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 65 | //============================================================================================================================== | ||
| 66 | // | ||
| 67 | // FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING | ||
| 68 | // | ||
| 69 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 70 | // EASU provides a high quality spatial-only scaling at relatively low cost. | ||
| 71 | // Meaning EASU is appropiate for laptops and other low-end GPUs. | ||
| 72 | // Quality from 1x to 4x area scaling is good. | ||
| 73 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 74 | // The scalar uses a modified fast approximation to the standard lanczos(size=2) kernel. | ||
| 75 | // EASU runs in a single pass, so it applies a directionally and anisotropically adaptive radial lanczos. | ||
| 76 | // This is also kept as simple as possible to have minimum runtime. | ||
| 77 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 78 | // The lanzcos filter has negative lobes, so by itself it will introduce ringing. | ||
| 79 | // To remove all ringing, the algorithm uses the nearest 2x2 input texels as a neighborhood, | ||
| 80 | // and limits output to the minimum and maximum of that neighborhood. | ||
| 81 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 82 | // Input image requirements: | ||
| 83 | // | ||
| 84 | // Color needs to be encoded as 3 channel[red, green, blue](e.g.XYZ not supported) | ||
| 85 | // Each channel needs to be in the range[0, 1] | ||
| 86 | // Any color primaries are supported | ||
| 87 | // Display / tonemapping curve needs to be as if presenting to sRGB display or similar(e.g.Gamma 2.0) | ||
| 88 | // There should be no banding in the input | ||
| 89 | // There should be no high amplitude noise in the input | ||
| 90 | // There should be no noise in the input that is not at input pixel granularity | ||
| 91 | // For performance purposes, use 32bpp formats | ||
| 92 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 93 | // Best to apply EASU at the end of the frame after tonemapping | ||
| 94 | // but before film grain or composite of the UI. | ||
| 95 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 96 | // Example of including this header for D3D HLSL : | ||
| 97 | // | ||
| 98 | // #define A_GPU 1 | ||
| 99 | // #define A_HLSL 1 | ||
| 100 | // #define A_HALF 1 | ||
| 101 | // #include "ffx_a.h" | ||
| 102 | // #define FSR_EASU_H 1 | ||
| 103 | // #define FSR_RCAS_H 1 | ||
| 104 | // //declare input callbacks | ||
| 105 | // #include "ffx_fsr1.h" | ||
| 106 | // | ||
| 107 | // Example of including this header for Vulkan GLSL : | ||
| 108 | // | ||
| 109 | // #define A_GPU 1 | ||
| 110 | // #define A_GLSL 1 | ||
| 111 | // #define A_HALF 1 | ||
| 112 | // #include "ffx_a.h" | ||
| 113 | // #define FSR_EASU_H 1 | ||
| 114 | // #define FSR_RCAS_H 1 | ||
| 115 | // //declare input callbacks | ||
| 116 | // #include "ffx_fsr1.h" | ||
| 117 | // | ||
| 118 | // Example of including this header for Vulkan HLSL : | ||
| 119 | // | ||
| 120 | // #define A_GPU 1 | ||
| 121 | // #define A_HLSL 1 | ||
| 122 | // #define A_HLSL_6_2 1 | ||
| 123 | // #define A_NO_16_BIT_CAST 1 | ||
| 124 | // #define A_HALF 1 | ||
| 125 | // #include "ffx_a.h" | ||
| 126 | // #define FSR_EASU_H 1 | ||
| 127 | // #define FSR_RCAS_H 1 | ||
| 128 | // //declare input callbacks | ||
| 129 | // #include "ffx_fsr1.h" | ||
| 130 | // | ||
| 131 | // Example of declaring the required input callbacks for GLSL : | ||
| 132 | // The callbacks need to gather4 for each color channel using the specified texture coordinate 'p'. | ||
| 133 | // EASU uses gather4 to reduce position computation logic and for free Arrays of Structures to Structures of Arrays conversion. | ||
| 134 | // | ||
| 135 | // AH4 FsrEasuRH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,0));} | ||
| 136 | // AH4 FsrEasuGH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,1));} | ||
| 137 | // AH4 FsrEasuBH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,2));} | ||
| 138 | // ... | ||
| 139 | // The FsrEasuCon function needs to be called from the CPU or GPU to set up constants. | ||
| 140 | // The difference in viewport and input image size is there to support Dynamic Resolution Scaling. | ||
| 141 | // To use FsrEasuCon() on the CPU, define A_CPU before including ffx_a and ffx_fsr1. | ||
| 142 | // Including a GPU example here, the 'con0' through 'con3' values would be stored out to a constant buffer. | ||
| 143 | // AU4 con0,con1,con2,con3; | ||
| 144 | // FsrEasuCon(con0,con1,con2,con3, | ||
| 145 | // 1920.0,1080.0, // Viewport size (top left aligned) in the input image which is to be scaled. | ||
| 146 | // 3840.0,2160.0, // The size of the input image. | ||
| 147 | // 2560.0,1440.0); // The output resolution. | ||
| 148 | //============================================================================================================================== | ||
| 149 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 150 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 151 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 152 | //============================================================================================================================== | ||
| 153 | // CONSTANT SETUP | ||
| 154 | //============================================================================================================================== | ||
| 155 | // Call to setup required constant values (works on CPU or GPU). | ||
| 156 | A_STATIC void FsrEasuCon( | ||
| 157 | outAU4 con0, | ||
| 158 | outAU4 con1, | ||
| 159 | outAU4 con2, | ||
| 160 | outAU4 con3, | ||
| 161 | // This the rendered image resolution being upscaled | ||
| 162 | AF1 inputViewportInPixelsX, | ||
| 163 | AF1 inputViewportInPixelsY, | ||
| 164 | // This is the resolution of the resource containing the input image (useful for dynamic resolution) | ||
| 165 | AF1 inputSizeInPixelsX, | ||
| 166 | AF1 inputSizeInPixelsY, | ||
| 167 | // This is the display resolution which the input image gets upscaled to | ||
| 168 | AF1 outputSizeInPixelsX, | ||
| 169 | AF1 outputSizeInPixelsY){ | ||
| 170 | // Output integer position to a pixel position in viewport. | ||
| 171 | con0[0]=AU1_AF1(inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)); | ||
| 172 | con0[1]=AU1_AF1(inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)); | ||
| 173 | con0[2]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5)); | ||
| 174 | con0[3]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5)); | ||
| 175 | // Viewport pixel position to normalized image space. | ||
| 176 | // This is used to get upper-left of 'F' tap. | ||
| 177 | con1[0]=AU1_AF1(ARcpF1(inputSizeInPixelsX)); | ||
| 178 | con1[1]=AU1_AF1(ARcpF1(inputSizeInPixelsY)); | ||
| 179 | // Centers of gather4, first offset from upper-left of 'F'. | ||
| 180 | // +---+---+ | ||
| 181 | // | | | | ||
| 182 | // +--(0)--+ | ||
| 183 | // | b | c | | ||
| 184 | // +---F---+---+---+ | ||
| 185 | // | e | f | g | h | | ||
| 186 | // +--(1)--+--(2)--+ | ||
| 187 | // | i | j | k | l | | ||
| 188 | // +---+---+---+---+ | ||
| 189 | // | n | o | | ||
| 190 | // +--(3)--+ | ||
| 191 | // | | | | ||
| 192 | // +---+---+ | ||
| 193 | con1[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); | ||
| 194 | con1[3]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsY)); | ||
| 195 | // These are from (0) instead of 'F'. | ||
| 196 | con2[0]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsX)); | ||
| 197 | con2[1]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); | ||
| 198 | con2[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); | ||
| 199 | con2[3]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); | ||
| 200 | con3[0]=AU1_AF1(AF1_( 0.0)*ARcpF1(inputSizeInPixelsX)); | ||
| 201 | con3[1]=AU1_AF1(AF1_( 4.0)*ARcpF1(inputSizeInPixelsY)); | ||
| 202 | con3[2]=con3[3]=0;} | ||
| 203 | |||
| 204 | //If the an offset into the input image resource | ||
| 205 | A_STATIC void FsrEasuConOffset( | ||
| 206 | outAU4 con0, | ||
| 207 | outAU4 con1, | ||
| 208 | outAU4 con2, | ||
| 209 | outAU4 con3, | ||
| 210 | // This the rendered image resolution being upscaled | ||
| 211 | AF1 inputViewportInPixelsX, | ||
| 212 | AF1 inputViewportInPixelsY, | ||
| 213 | // This is the resolution of the resource containing the input image (useful for dynamic resolution) | ||
| 214 | AF1 inputSizeInPixelsX, | ||
| 215 | AF1 inputSizeInPixelsY, | ||
| 216 | // This is the display resolution which the input image gets upscaled to | ||
| 217 | AF1 outputSizeInPixelsX, | ||
| 218 | AF1 outputSizeInPixelsY, | ||
| 219 | // This is the input image offset into the resource containing it (useful for dynamic resolution) | ||
| 220 | AF1 inputOffsetInPixelsX, | ||
| 221 | AF1 inputOffsetInPixelsY) { | ||
| 222 | FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); | ||
| 223 | con0[2] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsX * ARcpF1(outputSizeInPixelsX) - AF1_(0.5) + inputOffsetInPixelsX); | ||
| 224 | con0[3] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsY * ARcpF1(outputSizeInPixelsY) - AF1_(0.5) + inputOffsetInPixelsY); | ||
| 225 | } | ||
| 226 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 227 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 228 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 229 | //============================================================================================================================== | ||
| 230 | // NON-PACKED 32-BIT VERSION | ||
| 231 | //============================================================================================================================== | ||
| 232 | #if defined(A_GPU)&&defined(FSR_EASU_F) | ||
| 233 | // Input callback prototypes, need to be implemented by calling shader | ||
| 234 | AF4 FsrEasuRF(AF2 p); | ||
| 235 | AF4 FsrEasuGF(AF2 p); | ||
| 236 | AF4 FsrEasuBF(AF2 p); | ||
| 237 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 238 | // Filtering for a given tap for the scalar. | ||
| 239 | void FsrEasuTapF( | ||
| 240 | inout AF3 aC, // Accumulated color, with negative lobe. | ||
| 241 | inout AF1 aW, // Accumulated weight. | ||
| 242 | AF2 off, // Pixel offset from resolve position to tap. | ||
| 243 | AF2 dir, // Gradient direction. | ||
| 244 | AF2 len, // Length. | ||
| 245 | AF1 lob, // Negative lobe strength. | ||
| 246 | AF1 clp, // Clipping point. | ||
| 247 | AF3 c){ // Tap color. | ||
| 248 | // Rotate offset by direction. | ||
| 249 | AF2 v; | ||
| 250 | v.x=(off.x*( dir.x))+(off.y*dir.y); | ||
| 251 | v.y=(off.x*(-dir.y))+(off.y*dir.x); | ||
| 252 | // Anisotropy. | ||
| 253 | v*=len; | ||
| 254 | // Compute distance^2. | ||
| 255 | AF1 d2=v.x*v.x+v.y*v.y; | ||
| 256 | // Limit to the window as at corner, 2 taps can easily be outside. | ||
| 257 | d2=min(d2,clp); | ||
| 258 | // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. | ||
| 259 | // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 | ||
| 260 | // |_______________________________________| |_______________| | ||
| 261 | // base window | ||
| 262 | // The general form of the 'base' is, | ||
| 263 | // (a*(b*x^2-1)^2-(a-1)) | ||
| 264 | // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. | ||
| 265 | AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0); | ||
| 266 | AF1 wA=lob*d2+AF1_(-1.0); | ||
| 267 | wB*=wB; | ||
| 268 | wA*=wA; | ||
| 269 | wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0)); | ||
| 270 | AF1 w=wB*wA; | ||
| 271 | // Do weighted average. | ||
| 272 | aC+=c*w;aW+=w;} | ||
| 273 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 274 | // Accumulate direction and length. | ||
| 275 | void FsrEasuSetF( | ||
| 276 | inout AF2 dir, | ||
| 277 | inout AF1 len, | ||
| 278 | AF2 pp, | ||
| 279 | AP1 biS,AP1 biT,AP1 biU,AP1 biV, | ||
| 280 | AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){ | ||
| 281 | // Compute bilinear weight, branches factor out as predicates are compiler time immediates. | ||
| 282 | // s t | ||
| 283 | // u v | ||
| 284 | AF1 w = AF1_(0.0); | ||
| 285 | if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y); | ||
| 286 | if(biT)w= pp.x *(AF1_(1.0)-pp.y); | ||
| 287 | if(biU)w=(AF1_(1.0)-pp.x)* pp.y ; | ||
| 288 | if(biV)w= pp.x * pp.y ; | ||
| 289 | // Direction is the '+' diff. | ||
| 290 | // a | ||
| 291 | // b c d | ||
| 292 | // e | ||
| 293 | // Then takes magnitude from abs average of both sides of 'c'. | ||
| 294 | // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. | ||
| 295 | AF1 dc=lD-lC; | ||
| 296 | AF1 cb=lC-lB; | ||
| 297 | AF1 lenX=max(abs(dc),abs(cb)); | ||
| 298 | lenX=APrxLoRcpF1(lenX); | ||
| 299 | AF1 dirX=lD-lB; | ||
| 300 | dir.x+=dirX*w; | ||
| 301 | lenX=ASatF1(abs(dirX)*lenX); | ||
| 302 | lenX*=lenX; | ||
| 303 | len+=lenX*w; | ||
| 304 | // Repeat for the y axis. | ||
| 305 | AF1 ec=lE-lC; | ||
| 306 | AF1 ca=lC-lA; | ||
| 307 | AF1 lenY=max(abs(ec),abs(ca)); | ||
| 308 | lenY=APrxLoRcpF1(lenY); | ||
| 309 | AF1 dirY=lE-lA; | ||
| 310 | dir.y+=dirY*w; | ||
| 311 | lenY=ASatF1(abs(dirY)*lenY); | ||
| 312 | lenY*=lenY; | ||
| 313 | len+=lenY*w;} | ||
| 314 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 315 | void FsrEasuF( | ||
| 316 | out AF3 pix, | ||
| 317 | AU2 ip, // Integer pixel position in output. | ||
| 318 | AU4 con0, // Constants generated by FsrEasuCon(). | ||
| 319 | AU4 con1, | ||
| 320 | AU4 con2, | ||
| 321 | AU4 con3){ | ||
| 322 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 323 | // Get position of 'f'. | ||
| 324 | AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw); | ||
| 325 | AF2 fp=floor(pp); | ||
| 326 | pp-=fp; | ||
| 327 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 328 | // 12-tap kernel. | ||
| 329 | // b c | ||
| 330 | // e f g h | ||
| 331 | // i j k l | ||
| 332 | // n o | ||
| 333 | // Gather 4 ordering. | ||
| 334 | // a b | ||
| 335 | // r g | ||
| 336 | // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, | ||
| 337 | // a b <- unused (z) | ||
| 338 | // r g | ||
| 339 | // a b a b | ||
| 340 | // r g r g | ||
| 341 | // a b | ||
| 342 | // r g <- unused (z) | ||
| 343 | // Allowing dead-code removal to remove the 'z's. | ||
| 344 | AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw); | ||
| 345 | // These are from p0 to avoid pulling two constants on pre-Navi hardware. | ||
| 346 | AF2 p1=p0+AF2_AU2(con2.xy); | ||
| 347 | AF2 p2=p0+AF2_AU2(con2.zw); | ||
| 348 | AF2 p3=p0+AF2_AU2(con3.xy); | ||
| 349 | AF4 bczzR=FsrEasuRF(p0); | ||
| 350 | AF4 bczzG=FsrEasuGF(p0); | ||
| 351 | AF4 bczzB=FsrEasuBF(p0); | ||
| 352 | AF4 ijfeR=FsrEasuRF(p1); | ||
| 353 | AF4 ijfeG=FsrEasuGF(p1); | ||
| 354 | AF4 ijfeB=FsrEasuBF(p1); | ||
| 355 | AF4 klhgR=FsrEasuRF(p2); | ||
| 356 | AF4 klhgG=FsrEasuGF(p2); | ||
| 357 | AF4 klhgB=FsrEasuBF(p2); | ||
| 358 | AF4 zzonR=FsrEasuRF(p3); | ||
| 359 | AF4 zzonG=FsrEasuGF(p3); | ||
| 360 | AF4 zzonB=FsrEasuBF(p3); | ||
| 361 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 362 | // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). | ||
| 363 | AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG); | ||
| 364 | AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG); | ||
| 365 | AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG); | ||
| 366 | AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG); | ||
| 367 | // Rename. | ||
| 368 | AF1 bL=bczzL.x; | ||
| 369 | AF1 cL=bczzL.y; | ||
| 370 | AF1 iL=ijfeL.x; | ||
| 371 | AF1 jL=ijfeL.y; | ||
| 372 | AF1 fL=ijfeL.z; | ||
| 373 | AF1 eL=ijfeL.w; | ||
| 374 | AF1 kL=klhgL.x; | ||
| 375 | AF1 lL=klhgL.y; | ||
| 376 | AF1 hL=klhgL.z; | ||
| 377 | AF1 gL=klhgL.w; | ||
| 378 | AF1 oL=zzonL.z; | ||
| 379 | AF1 nL=zzonL.w; | ||
| 380 | // Accumulate for bilinear interpolation. | ||
| 381 | AF2 dir=AF2_(0.0); | ||
| 382 | AF1 len=AF1_(0.0); | ||
| 383 | FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL); | ||
| 384 | FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL); | ||
| 385 | FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL); | ||
| 386 | FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL); | ||
| 387 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 388 | // Normalize with approximation, and cleanup close to zero. | ||
| 389 | AF2 dir2=dir*dir; | ||
| 390 | AF1 dirR=dir2.x+dir2.y; | ||
| 391 | AP1 zro=dirR<AF1_(1.0/32768.0); | ||
| 392 | dirR=APrxLoRsqF1(dirR); | ||
| 393 | dirR=zro?AF1_(1.0):dirR; | ||
| 394 | dir.x=zro?AF1_(1.0):dir.x; | ||
| 395 | dir*=AF2_(dirR); | ||
| 396 | // Transform from {0 to 2} to {0 to 1} range, and shape with square. | ||
| 397 | len=len*AF1_(0.5); | ||
| 398 | len*=len; | ||
| 399 | // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. | ||
| 400 | AF1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpF1(max(abs(dir.x),abs(dir.y))); | ||
| 401 | // Anisotropic length after rotation, | ||
| 402 | // x := 1.0 lerp to 'stretch' on edges | ||
| 403 | // y := 1.0 lerp to 2x on edges | ||
| 404 | AF2 len2=AF2(AF1_(1.0)+(stretch-AF1_(1.0))*len,AF1_(1.0)+AF1_(-0.5)*len); | ||
| 405 | // Based on the amount of 'edge', | ||
| 406 | // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. | ||
| 407 | AF1 lob=AF1_(0.5)+AF1_((1.0/4.0-0.04)-0.5)*len; | ||
| 408 | // Set distance^2 clipping point to the end of the adjustable window. | ||
| 409 | AF1 clp=APrxLoRcpF1(lob); | ||
| 410 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 411 | // Accumulation mixed with min/max of 4 nearest. | ||
| 412 | // b c | ||
| 413 | // e f g h | ||
| 414 | // i j k l | ||
| 415 | // n o | ||
| 416 | AF3 min4=min(AMin3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)), | ||
| 417 | AF3(klhgR.x,klhgG.x,klhgB.x)); | ||
| 418 | AF3 max4=max(AMax3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)), | ||
| 419 | AF3(klhgR.x,klhgG.x,klhgB.x)); | ||
| 420 | // Accumulation. | ||
| 421 | AF3 aC=AF3_(0.0); | ||
| 422 | AF1 aW=AF1_(0.0); | ||
| 423 | FsrEasuTapF(aC,aW,AF2( 0.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.x,bczzG.x,bczzB.x)); // b | ||
| 424 | FsrEasuTapF(aC,aW,AF2( 1.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.y,bczzG.y,bczzB.y)); // c | ||
| 425 | FsrEasuTapF(aC,aW,AF2(-1.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.x,ijfeG.x,ijfeB.x)); // i | ||
| 426 | FsrEasuTapF(aC,aW,AF2( 0.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.y,ijfeG.y,ijfeB.y)); // j | ||
| 427 | FsrEasuTapF(aC,aW,AF2( 0.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.z,ijfeG.z,ijfeB.z)); // f | ||
| 428 | FsrEasuTapF(aC,aW,AF2(-1.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.w,ijfeG.w,ijfeB.w)); // e | ||
| 429 | FsrEasuTapF(aC,aW,AF2( 1.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.x,klhgG.x,klhgB.x)); // k | ||
| 430 | FsrEasuTapF(aC,aW,AF2( 2.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.y,klhgG.y,klhgB.y)); // l | ||
| 431 | FsrEasuTapF(aC,aW,AF2( 2.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.z,klhgG.z,klhgB.z)); // h | ||
| 432 | FsrEasuTapF(aC,aW,AF2( 1.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.w,klhgG.w,klhgB.w)); // g | ||
| 433 | FsrEasuTapF(aC,aW,AF2( 1.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.z,zzonG.z,zzonB.z)); // o | ||
| 434 | FsrEasuTapF(aC,aW,AF2( 0.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.w,zzonG.w,zzonB.w)); // n | ||
| 435 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 436 | // Normalize and dering. | ||
| 437 | pix=min(max4,max(min4,aC*AF3_(ARcpF1(aW))));} | ||
| 438 | #endif | ||
| 439 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 440 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 441 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 442 | //============================================================================================================================== | ||
| 443 | // PACKED 16-BIT VERSION | ||
| 444 | //============================================================================================================================== | ||
| 445 | #if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_EASU_H) | ||
| 446 | // Input callback prototypes, need to be implemented by calling shader | ||
| 447 | AH4 FsrEasuRH(AF2 p); | ||
| 448 | AH4 FsrEasuGH(AF2 p); | ||
| 449 | AH4 FsrEasuBH(AF2 p); | ||
| 450 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 451 | // This runs 2 taps in parallel. | ||
| 452 | void FsrEasuTapH( | ||
| 453 | inout AH2 aCR,inout AH2 aCG,inout AH2 aCB, | ||
| 454 | inout AH2 aW, | ||
| 455 | AH2 offX,AH2 offY, | ||
| 456 | AH2 dir, | ||
| 457 | AH2 len, | ||
| 458 | AH1 lob, | ||
| 459 | AH1 clp, | ||
| 460 | AH2 cR,AH2 cG,AH2 cB){ | ||
| 461 | AH2 vX,vY; | ||
| 462 | vX=offX* dir.xx +offY*dir.yy; | ||
| 463 | vY=offX*(-dir.yy)+offY*dir.xx; | ||
| 464 | vX*=len.x;vY*=len.y; | ||
| 465 | AH2 d2=vX*vX+vY*vY; | ||
| 466 | d2=min(d2,AH2_(clp)); | ||
| 467 | AH2 wB=AH2_(2.0/5.0)*d2+AH2_(-1.0); | ||
| 468 | AH2 wA=AH2_(lob)*d2+AH2_(-1.0); | ||
| 469 | wB*=wB; | ||
| 470 | wA*=wA; | ||
| 471 | wB=AH2_(25.0/16.0)*wB+AH2_(-(25.0/16.0-1.0)); | ||
| 472 | AH2 w=wB*wA; | ||
| 473 | aCR+=cR*w;aCG+=cG*w;aCB+=cB*w;aW+=w;} | ||
| 474 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 475 | // This runs 2 taps in parallel. | ||
| 476 | void FsrEasuSetH( | ||
| 477 | inout AH2 dirPX,inout AH2 dirPY, | ||
| 478 | inout AH2 lenP, | ||
| 479 | AH2 pp, | ||
| 480 | AP1 biST,AP1 biUV, | ||
| 481 | AH2 lA,AH2 lB,AH2 lC,AH2 lD,AH2 lE){ | ||
| 482 | AH2 w = AH2_(0.0); | ||
| 483 | if(biST)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_(AH1_(1.0)-pp.y); | ||
| 484 | if(biUV)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_( pp.y); | ||
| 485 | // ABS is not free in the packed FP16 path. | ||
| 486 | AH2 dc=lD-lC; | ||
| 487 | AH2 cb=lC-lB; | ||
| 488 | AH2 lenX=max(abs(dc),abs(cb)); | ||
| 489 | lenX=ARcpH2(lenX); | ||
| 490 | AH2 dirX=lD-lB; | ||
| 491 | dirPX+=dirX*w; | ||
| 492 | lenX=ASatH2(abs(dirX)*lenX); | ||
| 493 | lenX*=lenX; | ||
| 494 | lenP+=lenX*w; | ||
| 495 | AH2 ec=lE-lC; | ||
| 496 | AH2 ca=lC-lA; | ||
| 497 | AH2 lenY=max(abs(ec),abs(ca)); | ||
| 498 | lenY=ARcpH2(lenY); | ||
| 499 | AH2 dirY=lE-lA; | ||
| 500 | dirPY+=dirY*w; | ||
| 501 | lenY=ASatH2(abs(dirY)*lenY); | ||
| 502 | lenY*=lenY; | ||
| 503 | lenP+=lenY*w;} | ||
| 504 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 505 | void FsrEasuH( | ||
| 506 | out AH3 pix, | ||
| 507 | AU2 ip, | ||
| 508 | AU4 con0, | ||
| 509 | AU4 con1, | ||
| 510 | AU4 con2, | ||
| 511 | AU4 con3){ | ||
| 512 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 513 | AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw); | ||
| 514 | AF2 fp=floor(pp); | ||
| 515 | pp-=fp; | ||
| 516 | AH2 ppp=AH2(pp); | ||
| 517 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 518 | AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw); | ||
| 519 | AF2 p1=p0+AF2_AU2(con2.xy); | ||
| 520 | AF2 p2=p0+AF2_AU2(con2.zw); | ||
| 521 | AF2 p3=p0+AF2_AU2(con3.xy); | ||
| 522 | AH4 bczzR=FsrEasuRH(p0); | ||
| 523 | AH4 bczzG=FsrEasuGH(p0); | ||
| 524 | AH4 bczzB=FsrEasuBH(p0); | ||
| 525 | AH4 ijfeR=FsrEasuRH(p1); | ||
| 526 | AH4 ijfeG=FsrEasuGH(p1); | ||
| 527 | AH4 ijfeB=FsrEasuBH(p1); | ||
| 528 | AH4 klhgR=FsrEasuRH(p2); | ||
| 529 | AH4 klhgG=FsrEasuGH(p2); | ||
| 530 | AH4 klhgB=FsrEasuBH(p2); | ||
| 531 | AH4 zzonR=FsrEasuRH(p3); | ||
| 532 | AH4 zzonG=FsrEasuGH(p3); | ||
| 533 | AH4 zzonB=FsrEasuBH(p3); | ||
| 534 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 535 | AH4 bczzL=bczzB*AH4_(0.5)+(bczzR*AH4_(0.5)+bczzG); | ||
| 536 | AH4 ijfeL=ijfeB*AH4_(0.5)+(ijfeR*AH4_(0.5)+ijfeG); | ||
| 537 | AH4 klhgL=klhgB*AH4_(0.5)+(klhgR*AH4_(0.5)+klhgG); | ||
| 538 | AH4 zzonL=zzonB*AH4_(0.5)+(zzonR*AH4_(0.5)+zzonG); | ||
| 539 | AH1 bL=bczzL.x; | ||
| 540 | AH1 cL=bczzL.y; | ||
| 541 | AH1 iL=ijfeL.x; | ||
| 542 | AH1 jL=ijfeL.y; | ||
| 543 | AH1 fL=ijfeL.z; | ||
| 544 | AH1 eL=ijfeL.w; | ||
| 545 | AH1 kL=klhgL.x; | ||
| 546 | AH1 lL=klhgL.y; | ||
| 547 | AH1 hL=klhgL.z; | ||
| 548 | AH1 gL=klhgL.w; | ||
| 549 | AH1 oL=zzonL.z; | ||
| 550 | AH1 nL=zzonL.w; | ||
| 551 | // This part is different, accumulating 2 taps in parallel. | ||
| 552 | AH2 dirPX=AH2_(0.0); | ||
| 553 | AH2 dirPY=AH2_(0.0); | ||
| 554 | AH2 lenP=AH2_(0.0); | ||
| 555 | FsrEasuSetH(dirPX,dirPY,lenP,ppp,true, false,AH2(bL,cL),AH2(eL,fL),AH2(fL,gL),AH2(gL,hL),AH2(jL,kL)); | ||
| 556 | FsrEasuSetH(dirPX,dirPY,lenP,ppp,false,true ,AH2(fL,gL),AH2(iL,jL),AH2(jL,kL),AH2(kL,lL),AH2(nL,oL)); | ||
| 557 | AH2 dir=AH2(dirPX.r+dirPX.g,dirPY.r+dirPY.g); | ||
| 558 | AH1 len=lenP.r+lenP.g; | ||
| 559 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 560 | AH2 dir2=dir*dir; | ||
| 561 | AH1 dirR=dir2.x+dir2.y; | ||
| 562 | AP1 zro=dirR<AH1_(1.0/32768.0); | ||
| 563 | dirR=APrxLoRsqH1(dirR); | ||
| 564 | dirR=zro?AH1_(1.0):dirR; | ||
| 565 | dir.x=zro?AH1_(1.0):dir.x; | ||
| 566 | dir*=AH2_(dirR); | ||
| 567 | len=len*AH1_(0.5); | ||
| 568 | len*=len; | ||
| 569 | AH1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpH1(max(abs(dir.x),abs(dir.y))); | ||
| 570 | AH2 len2=AH2(AH1_(1.0)+(stretch-AH1_(1.0))*len,AH1_(1.0)+AH1_(-0.5)*len); | ||
| 571 | AH1 lob=AH1_(0.5)+AH1_((1.0/4.0-0.04)-0.5)*len; | ||
| 572 | AH1 clp=APrxLoRcpH1(lob); | ||
| 573 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 574 | // FP16 is different, using packed trick to do min and max in same operation. | ||
| 575 | AH2 bothR=max(max(AH2(-ijfeR.z,ijfeR.z),AH2(-klhgR.w,klhgR.w)),max(AH2(-ijfeR.y,ijfeR.y),AH2(-klhgR.x,klhgR.x))); | ||
| 576 | AH2 bothG=max(max(AH2(-ijfeG.z,ijfeG.z),AH2(-klhgG.w,klhgG.w)),max(AH2(-ijfeG.y,ijfeG.y),AH2(-klhgG.x,klhgG.x))); | ||
| 577 | AH2 bothB=max(max(AH2(-ijfeB.z,ijfeB.z),AH2(-klhgB.w,klhgB.w)),max(AH2(-ijfeB.y,ijfeB.y),AH2(-klhgB.x,klhgB.x))); | ||
| 578 | // This part is different for FP16, working pairs of taps at a time. | ||
| 579 | AH2 pR=AH2_(0.0); | ||
| 580 | AH2 pG=AH2_(0.0); | ||
| 581 | AH2 pB=AH2_(0.0); | ||
| 582 | AH2 pW=AH2_(0.0); | ||
| 583 | FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0, 1.0)-ppp.xx,AH2(-1.0,-1.0)-ppp.yy,dir,len2,lob,clp,bczzR.xy,bczzG.xy,bczzB.xy); | ||
| 584 | FsrEasuTapH(pR,pG,pB,pW,AH2(-1.0, 0.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,ijfeR.xy,ijfeG.xy,ijfeB.xy); | ||
| 585 | FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0,-1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,ijfeR.zw,ijfeG.zw,ijfeB.zw); | ||
| 586 | FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 2.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,klhgR.xy,klhgG.xy,klhgB.xy); | ||
| 587 | FsrEasuTapH(pR,pG,pB,pW,AH2( 2.0, 1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,klhgR.zw,klhgG.zw,klhgB.zw); | ||
| 588 | FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 0.0)-ppp.xx,AH2( 2.0, 2.0)-ppp.yy,dir,len2,lob,clp,zzonR.zw,zzonG.zw,zzonB.zw); | ||
| 589 | AH3 aC=AH3(pR.x+pR.y,pG.x+pG.y,pB.x+pB.y); | ||
| 590 | AH1 aW=pW.x+pW.y; | ||
| 591 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 592 | // Slightly different for FP16 version due to combined min and max. | ||
| 593 | pix=min(AH3(bothR.y,bothG.y,bothB.y),max(-AH3(bothR.x,bothG.x,bothB.x),aC*AH3_(ARcpH1(aW))));} | ||
| 594 | #endif | ||
| 595 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 596 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 597 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 598 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 599 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 600 | //============================================================================================================================== | ||
| 601 | // | ||
| 602 | // FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING | ||
| 603 | // | ||
| 604 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 605 | // CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. | ||
| 606 | // RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. | ||
| 607 | // RCAS also has a built in process to limit sharpening of what it detects as possible noise. | ||
| 608 | // RCAS sharper does not support scaling, as it should be applied after EASU scaling. | ||
| 609 | // Pass EASU output straight into RCAS, no color conversions necessary. | ||
| 610 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 611 | // RCAS is based on the following logic. | ||
| 612 | // RCAS uses a 5 tap filter in a cross pattern (same as CAS), | ||
| 613 | // w n | ||
| 614 | // w 1 w for taps w m e | ||
| 615 | // w s | ||
| 616 | // Where 'w' is the negative lobe weight. | ||
| 617 | // output = (w*(n+e+w+s)+m)/(4*w+1) | ||
| 618 | // RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, | ||
| 619 | // 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) | ||
| 620 | // 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) | ||
| 621 | // Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. | ||
| 622 | // This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. | ||
| 623 | // So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. | ||
| 624 | // As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. | ||
| 625 | // This stabilizes RCAS. | ||
| 626 | // RCAS does a simple highpass which is normalized against the local contrast then shaped, | ||
| 627 | // 0.25 | ||
| 628 | // 0.25 -1 0.25 | ||
| 629 | // 0.25 | ||
| 630 | // This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. | ||
| 631 | // | ||
| 632 | // GLSL example for the required callbacks : | ||
| 633 | // | ||
| 634 | // AH4 FsrRcasLoadH(ASW2 p){return AH4(imageLoad(imgSrc,ASU2(p)));} | ||
| 635 | // void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b) | ||
| 636 | // { | ||
| 637 | // //do any simple input color conversions here or leave empty if none needed | ||
| 638 | // } | ||
| 639 | // | ||
| 640 | // FsrRcasCon need to be called from the CPU or GPU to set up constants. | ||
| 641 | // Including a GPU example here, the 'con' value would be stored out to a constant buffer. | ||
| 642 | // | ||
| 643 | // AU4 con; | ||
| 644 | // FsrRcasCon(con, | ||
| 645 | // 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. | ||
| 646 | // --------------- | ||
| 647 | // RCAS sharpening supports a CAS-like pass-through alpha via, | ||
| 648 | // #define FSR_RCAS_PASSTHROUGH_ALPHA 1 | ||
| 649 | // RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. | ||
| 650 | // Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, | ||
| 651 | // #define FSR_RCAS_DENOISE 1 | ||
| 652 | //============================================================================================================================== | ||
| 653 | // This is set at the limit of providing unnatural results for sharpening. | ||
| 654 | #define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) | ||
| 655 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 656 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 657 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 658 | //============================================================================================================================== | ||
| 659 | // CONSTANT SETUP | ||
| 660 | //============================================================================================================================== | ||
| 661 | // Call to setup required constant values (works on CPU or GPU). | ||
| 662 | A_STATIC void FsrRcasCon( | ||
| 663 | outAU4 con, | ||
| 664 | // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. | ||
| 665 | AF1 sharpness){ | ||
| 666 | // Transform from stops to linear value. | ||
| 667 | sharpness=AExp2F1(-sharpness); | ||
| 668 | varAF2(hSharp)=initAF2(sharpness,sharpness); | ||
| 669 | con[0]=AU1_AF1(sharpness); | ||
| 670 | con[1]=AU1_AH2_AF2(hSharp); | ||
| 671 | con[2]=0; | ||
| 672 | con[3]=0;} | ||
| 673 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 674 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 675 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 676 | //============================================================================================================================== | ||
| 677 | // NON-PACKED 32-BIT VERSION | ||
| 678 | //============================================================================================================================== | ||
| 679 | #if defined(A_GPU)&&defined(FSR_RCAS_F) | ||
| 680 | // Input callback prototypes that need to be implemented by calling shader | ||
| 681 | AF4 FsrRcasLoadF(ASU2 p); | ||
| 682 | void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b); | ||
| 683 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 684 | void FsrRcasF( | ||
| 685 | out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. | ||
| 686 | out AF1 pixG, | ||
| 687 | out AF1 pixB, | ||
| 688 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 689 | out AF1 pixA, | ||
| 690 | #endif | ||
| 691 | AU2 ip, // Integer pixel position in output. | ||
| 692 | AU4 con){ // Constant generated by RcasSetup(). | ||
| 693 | // Algorithm uses minimal 3x3 pixel neighborhood. | ||
| 694 | // b | ||
| 695 | // d e f | ||
| 696 | // h | ||
| 697 | ASU2 sp=ASU2(ip); | ||
| 698 | AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb; | ||
| 699 | AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb; | ||
| 700 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 701 | AF4 ee=FsrRcasLoadF(sp); | ||
| 702 | AF3 e=ee.rgb;pixA=ee.a; | ||
| 703 | #else | ||
| 704 | AF3 e=FsrRcasLoadF(sp).rgb; | ||
| 705 | #endif | ||
| 706 | AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb; | ||
| 707 | AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb; | ||
| 708 | // Rename (32-bit) or regroup (16-bit). | ||
| 709 | AF1 bR=b.r; | ||
| 710 | AF1 bG=b.g; | ||
| 711 | AF1 bB=b.b; | ||
| 712 | AF1 dR=d.r; | ||
| 713 | AF1 dG=d.g; | ||
| 714 | AF1 dB=d.b; | ||
| 715 | AF1 eR=e.r; | ||
| 716 | AF1 eG=e.g; | ||
| 717 | AF1 eB=e.b; | ||
| 718 | AF1 fR=f.r; | ||
| 719 | AF1 fG=f.g; | ||
| 720 | AF1 fB=f.b; | ||
| 721 | AF1 hR=h.r; | ||
| 722 | AF1 hG=h.g; | ||
| 723 | AF1 hB=h.b; | ||
| 724 | // Run optional input transform. | ||
| 725 | FsrRcasInputF(bR,bG,bB); | ||
| 726 | FsrRcasInputF(dR,dG,dB); | ||
| 727 | FsrRcasInputF(eR,eG,eB); | ||
| 728 | FsrRcasInputF(fR,fG,fB); | ||
| 729 | FsrRcasInputF(hR,hG,hB); | ||
| 730 | // Luma times 2. | ||
| 731 | AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG); | ||
| 732 | AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG); | ||
| 733 | AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG); | ||
| 734 | AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG); | ||
| 735 | AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG); | ||
| 736 | // Noise detection. | ||
| 737 | AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL; | ||
| 738 | nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL))); | ||
| 739 | nz=AF1_(-0.5)*nz+AF1_(1.0); | ||
| 740 | // Min and max of ring. | ||
| 741 | AF1 mn4R=min(AMin3F1(bR,dR,fR),hR); | ||
| 742 | AF1 mn4G=min(AMin3F1(bG,dG,fG),hG); | ||
| 743 | AF1 mn4B=min(AMin3F1(bB,dB,fB),hB); | ||
| 744 | AF1 mx4R=max(AMax3F1(bR,dR,fR),hR); | ||
| 745 | AF1 mx4G=max(AMax3F1(bG,dG,fG),hG); | ||
| 746 | AF1 mx4B=max(AMax3F1(bB,dB,fB),hB); | ||
| 747 | // Immediate constants for peak range. | ||
| 748 | AF2 peakC=AF2(1.0,-1.0*4.0); | ||
| 749 | // Limiters, these need to be high precision RCPs. | ||
| 750 | AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R); | ||
| 751 | AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G); | ||
| 752 | AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B); | ||
| 753 | AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y); | ||
| 754 | AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y); | ||
| 755 | AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y); | ||
| 756 | AF1 lobeR=max(-hitMinR,hitMaxR); | ||
| 757 | AF1 lobeG=max(-hitMinG,hitMaxG); | ||
| 758 | AF1 lobeB=max(-hitMinB,hitMaxB); | ||
| 759 | AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x); | ||
| 760 | // Apply noise removal. | ||
| 761 | #ifdef FSR_RCAS_DENOISE | ||
| 762 | lobe*=nz; | ||
| 763 | #endif | ||
| 764 | // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. | ||
| 765 | AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0)); | ||
| 766 | pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; | ||
| 767 | pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; | ||
| 768 | pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; | ||
| 769 | return;} | ||
| 770 | #endif | ||
| 771 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 772 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 773 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 774 | //============================================================================================================================== | ||
| 775 | // NON-PACKED 16-BIT VERSION | ||
| 776 | //============================================================================================================================== | ||
| 777 | #if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H) | ||
| 778 | // Input callback prototypes that need to be implemented by calling shader | ||
| 779 | AH4 FsrRcasLoadH(ASW2 p); | ||
| 780 | void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b); | ||
| 781 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 782 | void FsrRcasH( | ||
| 783 | out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. | ||
| 784 | out AH1 pixG, | ||
| 785 | out AH1 pixB, | ||
| 786 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 787 | out AH1 pixA, | ||
| 788 | #endif | ||
| 789 | AU2 ip, // Integer pixel position in output. | ||
| 790 | AU4 con){ // Constant generated by RcasSetup(). | ||
| 791 | // Sharpening algorithm uses minimal 3x3 pixel neighborhood. | ||
| 792 | // b | ||
| 793 | // d e f | ||
| 794 | // h | ||
| 795 | ASW2 sp=ASW2(ip); | ||
| 796 | AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb; | ||
| 797 | AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb; | ||
| 798 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 799 | AH4 ee=FsrRcasLoadH(sp); | ||
| 800 | AH3 e=ee.rgb;pixA=ee.a; | ||
| 801 | #else | ||
| 802 | AH3 e=FsrRcasLoadH(sp).rgb; | ||
| 803 | #endif | ||
| 804 | AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb; | ||
| 805 | AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb; | ||
| 806 | // Rename (32-bit) or regroup (16-bit). | ||
| 807 | AH1 bR=b.r; | ||
| 808 | AH1 bG=b.g; | ||
| 809 | AH1 bB=b.b; | ||
| 810 | AH1 dR=d.r; | ||
| 811 | AH1 dG=d.g; | ||
| 812 | AH1 dB=d.b; | ||
| 813 | AH1 eR=e.r; | ||
| 814 | AH1 eG=e.g; | ||
| 815 | AH1 eB=e.b; | ||
| 816 | AH1 fR=f.r; | ||
| 817 | AH1 fG=f.g; | ||
| 818 | AH1 fB=f.b; | ||
| 819 | AH1 hR=h.r; | ||
| 820 | AH1 hG=h.g; | ||
| 821 | AH1 hB=h.b; | ||
| 822 | // Run optional input transform. | ||
| 823 | FsrRcasInputH(bR,bG,bB); | ||
| 824 | FsrRcasInputH(dR,dG,dB); | ||
| 825 | FsrRcasInputH(eR,eG,eB); | ||
| 826 | FsrRcasInputH(fR,fG,fB); | ||
| 827 | FsrRcasInputH(hR,hG,hB); | ||
| 828 | // Luma times 2. | ||
| 829 | AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG); | ||
| 830 | AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG); | ||
| 831 | AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG); | ||
| 832 | AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG); | ||
| 833 | AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG); | ||
| 834 | // Noise detection. | ||
| 835 | AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL; | ||
| 836 | nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL))); | ||
| 837 | nz=AH1_(-0.5)*nz+AH1_(1.0); | ||
| 838 | // Min and max of ring. | ||
| 839 | AH1 mn4R=min(AMin3H1(bR,dR,fR),hR); | ||
| 840 | AH1 mn4G=min(AMin3H1(bG,dG,fG),hG); | ||
| 841 | AH1 mn4B=min(AMin3H1(bB,dB,fB),hB); | ||
| 842 | AH1 mx4R=max(AMax3H1(bR,dR,fR),hR); | ||
| 843 | AH1 mx4G=max(AMax3H1(bG,dG,fG),hG); | ||
| 844 | AH1 mx4B=max(AMax3H1(bB,dB,fB),hB); | ||
| 845 | // Immediate constants for peak range. | ||
| 846 | AH2 peakC=AH2(1.0,-1.0*4.0); | ||
| 847 | // Limiters, these need to be high precision RCPs. | ||
| 848 | AH1 hitMinR=mn4R*ARcpH1(AH1_(4.0)*mx4R); | ||
| 849 | AH1 hitMinG=mn4G*ARcpH1(AH1_(4.0)*mx4G); | ||
| 850 | AH1 hitMinB=mn4B*ARcpH1(AH1_(4.0)*mx4B); | ||
| 851 | AH1 hitMaxR=(peakC.x-mx4R)*ARcpH1(AH1_(4.0)*mn4R+peakC.y); | ||
| 852 | AH1 hitMaxG=(peakC.x-mx4G)*ARcpH1(AH1_(4.0)*mn4G+peakC.y); | ||
| 853 | AH1 hitMaxB=(peakC.x-mx4B)*ARcpH1(AH1_(4.0)*mn4B+peakC.y); | ||
| 854 | AH1 lobeR=max(-hitMinR,hitMaxR); | ||
| 855 | AH1 lobeG=max(-hitMinG,hitMaxG); | ||
| 856 | AH1 lobeB=max(-hitMinB,hitMaxB); | ||
| 857 | AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x; | ||
| 858 | // Apply noise removal. | ||
| 859 | #ifdef FSR_RCAS_DENOISE | ||
| 860 | lobe*=nz; | ||
| 861 | #endif | ||
| 862 | // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. | ||
| 863 | AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0)); | ||
| 864 | pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; | ||
| 865 | pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; | ||
| 866 | pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} | ||
| 867 | #endif | ||
| 868 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 869 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 870 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 871 | //============================================================================================================================== | ||
| 872 | // PACKED 16-BIT VERSION | ||
| 873 | //============================================================================================================================== | ||
| 874 | #if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2) | ||
| 875 | // Input callback prototypes that need to be implemented by the calling shader | ||
| 876 | AH4 FsrRcasLoadHx2(ASW2 p); | ||
| 877 | void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b); | ||
| 878 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 879 | // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. | ||
| 880 | void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){ | ||
| 881 | #ifdef A_HLSL | ||
| 882 | // Invoke a slower path for DX only, since it won't allow uninitialized values. | ||
| 883 | pix0.a=pix1.a=0.0; | ||
| 884 | #endif | ||
| 885 | pix0.rgb=AH3(pixR.x,pixG.x,pixB.x); | ||
| 886 | pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);} | ||
| 887 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 888 | void FsrRcasHx2( | ||
| 889 | // Output values are for 2 8x8 tiles in a 16x8 region. | ||
| 890 | // pix<R,G,B>.x = left 8x8 tile | ||
| 891 | // pix<R,G,B>.y = right 8x8 tile | ||
| 892 | // This enables later processing to easily be packed as well. | ||
| 893 | out AH2 pixR, | ||
| 894 | out AH2 pixG, | ||
| 895 | out AH2 pixB, | ||
| 896 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 897 | out AH2 pixA, | ||
| 898 | #endif | ||
| 899 | AU2 ip, // Integer pixel position in output. | ||
| 900 | AU4 con){ // Constant generated by RcasSetup(). | ||
| 901 | // No scaling algorithm uses minimal 3x3 pixel neighborhood. | ||
| 902 | ASW2 sp0=ASW2(ip); | ||
| 903 | AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb; | ||
| 904 | AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb; | ||
| 905 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 906 | AH4 ee0=FsrRcasLoadHx2(sp0); | ||
| 907 | AH3 e0=ee0.rgb;pixA.r=ee0.a; | ||
| 908 | #else | ||
| 909 | AH3 e0=FsrRcasLoadHx2(sp0).rgb; | ||
| 910 | #endif | ||
| 911 | AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb; | ||
| 912 | AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb; | ||
| 913 | ASW2 sp1=sp0+ASW2(8,0); | ||
| 914 | AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb; | ||
| 915 | AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb; | ||
| 916 | #ifdef FSR_RCAS_PASSTHROUGH_ALPHA | ||
| 917 | AH4 ee1=FsrRcasLoadHx2(sp1); | ||
| 918 | AH3 e1=ee1.rgb;pixA.g=ee1.a; | ||
| 919 | #else | ||
| 920 | AH3 e1=FsrRcasLoadHx2(sp1).rgb; | ||
| 921 | #endif | ||
| 922 | AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb; | ||
| 923 | AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb; | ||
| 924 | // Arrays of Structures to Structures of Arrays conversion. | ||
| 925 | AH2 bR=AH2(b0.r,b1.r); | ||
| 926 | AH2 bG=AH2(b0.g,b1.g); | ||
| 927 | AH2 bB=AH2(b0.b,b1.b); | ||
| 928 | AH2 dR=AH2(d0.r,d1.r); | ||
| 929 | AH2 dG=AH2(d0.g,d1.g); | ||
| 930 | AH2 dB=AH2(d0.b,d1.b); | ||
| 931 | AH2 eR=AH2(e0.r,e1.r); | ||
| 932 | AH2 eG=AH2(e0.g,e1.g); | ||
| 933 | AH2 eB=AH2(e0.b,e1.b); | ||
| 934 | AH2 fR=AH2(f0.r,f1.r); | ||
| 935 | AH2 fG=AH2(f0.g,f1.g); | ||
| 936 | AH2 fB=AH2(f0.b,f1.b); | ||
| 937 | AH2 hR=AH2(h0.r,h1.r); | ||
| 938 | AH2 hG=AH2(h0.g,h1.g); | ||
| 939 | AH2 hB=AH2(h0.b,h1.b); | ||
| 940 | // Run optional input transform. | ||
| 941 | FsrRcasInputHx2(bR,bG,bB); | ||
| 942 | FsrRcasInputHx2(dR,dG,dB); | ||
| 943 | FsrRcasInputHx2(eR,eG,eB); | ||
| 944 | FsrRcasInputHx2(fR,fG,fB); | ||
| 945 | FsrRcasInputHx2(hR,hG,hB); | ||
| 946 | // Luma times 2. | ||
| 947 | AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG); | ||
| 948 | AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG); | ||
| 949 | AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG); | ||
| 950 | AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG); | ||
| 951 | AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG); | ||
| 952 | // Noise detection. | ||
| 953 | AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL; | ||
| 954 | nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL))); | ||
| 955 | nz=AH2_(-0.5)*nz+AH2_(1.0); | ||
| 956 | // Min and max of ring. | ||
| 957 | AH2 mn4R=min(AMin3H2(bR,dR,fR),hR); | ||
| 958 | AH2 mn4G=min(AMin3H2(bG,dG,fG),hG); | ||
| 959 | AH2 mn4B=min(AMin3H2(bB,dB,fB),hB); | ||
| 960 | AH2 mx4R=max(AMax3H2(bR,dR,fR),hR); | ||
| 961 | AH2 mx4G=max(AMax3H2(bG,dG,fG),hG); | ||
| 962 | AH2 mx4B=max(AMax3H2(bB,dB,fB),hB); | ||
| 963 | // Immediate constants for peak range. | ||
| 964 | AH2 peakC=AH2(1.0,-1.0*4.0); | ||
| 965 | // Limiters, these need to be high precision RCPs. | ||
| 966 | AH2 hitMinR=mn4R*ARcpH2(AH2_(4.0)*mx4R); | ||
| 967 | AH2 hitMinG=mn4G*ARcpH2(AH2_(4.0)*mx4G); | ||
| 968 | AH2 hitMinB=mn4B*ARcpH2(AH2_(4.0)*mx4B); | ||
| 969 | AH2 hitMaxR=(peakC.x-mx4R)*ARcpH2(AH2_(4.0)*mn4R+peakC.y); | ||
| 970 | AH2 hitMaxG=(peakC.x-mx4G)*ARcpH2(AH2_(4.0)*mn4G+peakC.y); | ||
| 971 | AH2 hitMaxB=(peakC.x-mx4B)*ARcpH2(AH2_(4.0)*mn4B+peakC.y); | ||
| 972 | AH2 lobeR=max(-hitMinR,hitMaxR); | ||
| 973 | AH2 lobeG=max(-hitMinG,hitMaxG); | ||
| 974 | AH2 lobeB=max(-hitMinB,hitMaxB); | ||
| 975 | AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x); | ||
| 976 | // Apply noise removal. | ||
| 977 | #ifdef FSR_RCAS_DENOISE | ||
| 978 | lobe*=nz; | ||
| 979 | #endif | ||
| 980 | // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. | ||
| 981 | AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0)); | ||
| 982 | pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; | ||
| 983 | pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; | ||
| 984 | pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} | ||
| 985 | #endif | ||
| 986 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 987 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 988 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 989 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 990 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 991 | //============================================================================================================================== | ||
| 992 | // | ||
| 993 | // FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR | ||
| 994 | // | ||
| 995 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 996 | // Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. | ||
| 997 | // Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. | ||
| 998 | // The 'Lfga*()' functions provide a convenient way to introduce grain. | ||
| 999 | // These functions limit grain based on distance to signal limits. | ||
| 1000 | // This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. | ||
| 1001 | // Grain application should be done in a linear colorspace. | ||
| 1002 | // The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). | ||
| 1003 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1004 | // Usage, | ||
| 1005 | // FsrLfga*( | ||
| 1006 | // color, // In/out linear colorspace color {0 to 1} ranged. | ||
| 1007 | // grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. | ||
| 1008 | // amount); // Amount of grain (0 to 1} ranged. | ||
| 1009 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1010 | // Example if grain texture is monochrome: 'FsrLfgaF(color,AF3_(grain),amount)' | ||
| 1011 | //============================================================================================================================== | ||
| 1012 | #if defined(A_GPU) | ||
| 1013 | // Maximum grain is the minimum distance to the signal limit. | ||
| 1014 | void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);} | ||
| 1015 | #endif | ||
| 1016 | //============================================================================================================================== | ||
| 1017 | #if defined(A_GPU)&&defined(A_HALF) | ||
| 1018 | // Half precision version (slower). | ||
| 1019 | void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);} | ||
| 1020 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1021 | // Packed half precision version (faster). | ||
| 1022 | void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){ | ||
| 1023 | cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);} | ||
| 1024 | #endif | ||
| 1025 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1026 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1027 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1028 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1029 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1030 | //============================================================================================================================== | ||
| 1031 | // | ||
| 1032 | // FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER | ||
| 1033 | // | ||
| 1034 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1035 | // This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. | ||
| 1036 | // The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. | ||
| 1037 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1038 | // Reversible tonemapper usage, | ||
| 1039 | // FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. | ||
| 1040 | // FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. | ||
| 1041 | //============================================================================================================================== | ||
| 1042 | #if defined(A_GPU) | ||
| 1043 | void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));} | ||
| 1044 | // The extra max solves the c=1.0 case (which is a /0). | ||
| 1045 | void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));} | ||
| 1046 | #endif | ||
| 1047 | //============================================================================================================================== | ||
| 1048 | #if defined(A_GPU)&&defined(A_HALF) | ||
| 1049 | void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));} | ||
| 1050 | void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));} | ||
| 1051 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1052 | void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ | ||
| 1053 | AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;} | ||
| 1054 | void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ | ||
| 1055 | AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;} | ||
| 1056 | #endif | ||
| 1057 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1058 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1059 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1060 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 1061 | //_____________________________________________________________/\_______________________________________________________________ | ||
| 1062 | //============================================================================================================================== | ||
| 1063 | // | ||
| 1064 | // FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER | ||
| 1065 | // | ||
| 1066 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1067 | // Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. | ||
| 1068 | // Gamma 2.0 is used so that the conversion back to linear is just to square the color. | ||
| 1069 | // The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. | ||
| 1070 | // Given good non-biased temporal blue noise as dither input, | ||
| 1071 | // the output dither will temporally conserve energy. | ||
| 1072 | // This is done by choosing the linear nearest step point instead of perceptual nearest. | ||
| 1073 | // See code below for details. | ||
| 1074 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1075 | // DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION | ||
| 1076 | // =============================================== | ||
| 1077 | // - Output is 'uint(floor(saturate(n)*255.0+0.5))'. | ||
| 1078 | // - Thus rounding is to nearest. | ||
| 1079 | // - NaN gets converted to zero. | ||
| 1080 | // - INF is clamped to {0.0 to 1.0}. | ||
| 1081 | //============================================================================================================================== | ||
| 1082 | #if defined(A_GPU) | ||
| 1083 | // Hand tuned integer position to dither value, with more values than simple checkerboard. | ||
| 1084 | // Only 32-bit has enough precision for this compddation. | ||
| 1085 | // Output is {0 to <1}. | ||
| 1086 | AF1 FsrTepdDitF(AU2 p,AU1 f){ | ||
| 1087 | AF1 x=AF1_(p.x+f); | ||
| 1088 | AF1 y=AF1_(p.y); | ||
| 1089 | // The 1.61803 golden ratio. | ||
| 1090 | AF1 a=AF1_((1.0+sqrt(5.0))/2.0); | ||
| 1091 | // Number designed to provide a good visual pattern. | ||
| 1092 | AF1 b=AF1_(1.0/3.69); | ||
| 1093 | x=x*a+(y*b); | ||
| 1094 | return AFractF1(x);} | ||
| 1095 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1096 | // This version is 8-bit gamma 2.0. | ||
| 1097 | // The 'c' input is {0 to 1}. | ||
| 1098 | // Output is {0 to 1} ready for image store. | ||
| 1099 | void FsrTepdC8F(inout AF3 c,AF1 dit){ | ||
| 1100 | AF3 n=sqrt(c); | ||
| 1101 | n=floor(n*AF3_(255.0))*AF3_(1.0/255.0); | ||
| 1102 | AF3 a=n*n; | ||
| 1103 | AF3 b=n+AF3_(1.0/255.0);b=b*b; | ||
| 1104 | // Ratio of 'a' to 'b' required to produce 'c'. | ||
| 1105 | // APrxLoRcpF1() won't work here (at least for very high dynamic ranges). | ||
| 1106 | // APrxMedRcpF1() is an IADD,FMA,MUL. | ||
| 1107 | AF3 r=(c-b)*APrxMedRcpF3(a-b); | ||
| 1108 | // Use the ratio as a cutoff to choose 'a' or 'b'. | ||
| 1109 | // AGtZeroF1() is a MUL. | ||
| 1110 | c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));} | ||
| 1111 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1112 | // This version is 10-bit gamma 2.0. | ||
| 1113 | // The 'c' input is {0 to 1}. | ||
| 1114 | // Output is {0 to 1} ready for image store. | ||
| 1115 | void FsrTepdC10F(inout AF3 c,AF1 dit){ | ||
| 1116 | AF3 n=sqrt(c); | ||
| 1117 | n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0); | ||
| 1118 | AF3 a=n*n; | ||
| 1119 | AF3 b=n+AF3_(1.0/1023.0);b=b*b; | ||
| 1120 | AF3 r=(c-b)*APrxMedRcpF3(a-b); | ||
| 1121 | c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));} | ||
| 1122 | #endif | ||
| 1123 | //============================================================================================================================== | ||
| 1124 | #if defined(A_GPU)&&defined(A_HALF) | ||
| 1125 | AH1 FsrTepdDitH(AU2 p,AU1 f){ | ||
| 1126 | AF1 x=AF1_(p.x+f); | ||
| 1127 | AF1 y=AF1_(p.y); | ||
| 1128 | AF1 a=AF1_((1.0+sqrt(5.0))/2.0); | ||
| 1129 | AF1 b=AF1_(1.0/3.69); | ||
| 1130 | x=x*a+(y*b); | ||
| 1131 | return AH1(AFractF1(x));} | ||
| 1132 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1133 | void FsrTepdC8H(inout AH3 c,AH1 dit){ | ||
| 1134 | AH3 n=sqrt(c); | ||
| 1135 | n=floor(n*AH3_(255.0))*AH3_(1.0/255.0); | ||
| 1136 | AH3 a=n*n; | ||
| 1137 | AH3 b=n+AH3_(1.0/255.0);b=b*b; | ||
| 1138 | AH3 r=(c-b)*APrxMedRcpH3(a-b); | ||
| 1139 | c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));} | ||
| 1140 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1141 | void FsrTepdC10H(inout AH3 c,AH1 dit){ | ||
| 1142 | AH3 n=sqrt(c); | ||
| 1143 | n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0); | ||
| 1144 | AH3 a=n*n; | ||
| 1145 | AH3 b=n+AH3_(1.0/1023.0);b=b*b; | ||
| 1146 | AH3 r=(c-b)*APrxMedRcpH3(a-b); | ||
| 1147 | c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));} | ||
| 1148 | //============================================================================================================================== | ||
| 1149 | // This computes dither for positions 'p' and 'p+{8,0}'. | ||
| 1150 | AH2 FsrTepdDitHx2(AU2 p,AU1 f){ | ||
| 1151 | AF2 x; | ||
| 1152 | x.x=AF1_(p.x+f); | ||
| 1153 | x.y=x.x+AF1_(8.0); | ||
| 1154 | AF1 y=AF1_(p.y); | ||
| 1155 | AF1 a=AF1_((1.0+sqrt(5.0))/2.0); | ||
| 1156 | AF1 b=AF1_(1.0/3.69); | ||
| 1157 | x=x*AF2_(a)+AF2_(y*b); | ||
| 1158 | return AH2(AFractF2(x));} | ||
| 1159 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1160 | void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ | ||
| 1161 | AH2 nR=sqrt(cR); | ||
| 1162 | AH2 nG=sqrt(cG); | ||
| 1163 | AH2 nB=sqrt(cB); | ||
| 1164 | nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0); | ||
| 1165 | nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0); | ||
| 1166 | nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0); | ||
| 1167 | AH2 aR=nR*nR; | ||
| 1168 | AH2 aG=nG*nG; | ||
| 1169 | AH2 aB=nB*nB; | ||
| 1170 | AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR; | ||
| 1171 | AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG; | ||
| 1172 | AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB; | ||
| 1173 | AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); | ||
| 1174 | AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); | ||
| 1175 | AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); | ||
| 1176 | cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0)); | ||
| 1177 | cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0)); | ||
| 1178 | cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));} | ||
| 1179 | //------------------------------------------------------------------------------------------------------------------------------ | ||
| 1180 | void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ | ||
| 1181 | AH2 nR=sqrt(cR); | ||
| 1182 | AH2 nG=sqrt(cG); | ||
| 1183 | AH2 nB=sqrt(cB); | ||
| 1184 | nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0); | ||
| 1185 | nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0); | ||
| 1186 | nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0); | ||
| 1187 | AH2 aR=nR*nR; | ||
| 1188 | AH2 aG=nG*nG; | ||
| 1189 | AH2 aB=nB*nB; | ||
| 1190 | AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR; | ||
| 1191 | AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG; | ||
| 1192 | AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB; | ||
| 1193 | AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); | ||
| 1194 | AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); | ||
| 1195 | AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); | ||
| 1196 | cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0)); | ||
| 1197 | cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0)); | ||
| 1198 | cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));} | ||
| 1199 | #endif | ||
diff --git a/externals/FidelityFX-FSR/license.txt b/externals/FidelityFX-FSR/license.txt new file mode 100644 index 000000000..324cba594 --- /dev/null +++ b/externals/FidelityFX-FSR/license.txt | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. | ||
| 2 | |||
| 3 | Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 4 | of this software and associated documentation files (the "Software"), to deal | ||
| 5 | in the Software without restriction, including without limitation the rights | ||
| 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 7 | copies of the Software, and to permit persons to whom the Software is | ||
| 8 | furnished to do so, subject to the following conditions: | ||
| 9 | |||
| 10 | The above copyright notice and this permission notice shall be included in | ||
| 11 | all copies or substantial portions of the Software. | ||
| 12 | |||
| 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 19 | THE SOFTWARE. | ||
diff --git a/src/common/math_util.h b/src/common/math_util.h index 4c38d8040..510c4e56d 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h | |||
| @@ -48,8 +48,8 @@ struct Rectangle { | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | [[nodiscard]] Rectangle<T> Scale(const float s) const { | 50 | [[nodiscard]] Rectangle<T> Scale(const float s) const { |
| 51 | return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), | 51 | return Rectangle{left, top, static_cast<T>(static_cast<float>(left + GetWidth()) * s), |
| 52 | static_cast<T>(top + GetHeight() * s)}; | 52 | static_cast<T>(static_cast<float>(top + GetHeight()) * s)}; |
| 53 | } | 53 | } |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 9dd5e3efb..3bcaa072f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -47,7 +47,9 @@ void LogSettings() { | |||
| 47 | log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); | 47 | log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); |
| 48 | log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); | 48 | log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); |
| 49 | log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); | 49 | log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); |
| 50 | log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue()); | 50 | log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); |
| 51 | log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue()); | ||
| 52 | log_setting("Renderer_AntiAliasing", values.anti_aliasing.GetValue()); | ||
| 51 | log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); | 53 | log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); |
| 52 | log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); | 54 | log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); |
| 53 | log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); | 55 | log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); |
| @@ -105,6 +107,55 @@ float Volume() { | |||
| 105 | return values.volume.GetValue() / 100.0f; | 107 | return values.volume.GetValue() / 100.0f; |
| 106 | } | 108 | } |
| 107 | 109 | ||
| 110 | void UpdateRescalingInfo() { | ||
| 111 | const auto setup = values.resolution_setup.GetValue(); | ||
| 112 | auto& info = values.resolution_info; | ||
| 113 | info.downscale = false; | ||
| 114 | switch (setup) { | ||
| 115 | case ResolutionSetup::Res1_2X: | ||
| 116 | info.up_scale = 1; | ||
| 117 | info.down_shift = 1; | ||
| 118 | info.downscale = true; | ||
| 119 | break; | ||
| 120 | case ResolutionSetup::Res3_4X: | ||
| 121 | info.up_scale = 3; | ||
| 122 | info.down_shift = 2; | ||
| 123 | info.downscale = true; | ||
| 124 | break; | ||
| 125 | case ResolutionSetup::Res1X: | ||
| 126 | info.up_scale = 1; | ||
| 127 | info.down_shift = 0; | ||
| 128 | break; | ||
| 129 | case ResolutionSetup::Res2X: | ||
| 130 | info.up_scale = 2; | ||
| 131 | info.down_shift = 0; | ||
| 132 | break; | ||
| 133 | case ResolutionSetup::Res3X: | ||
| 134 | info.up_scale = 3; | ||
| 135 | info.down_shift = 0; | ||
| 136 | break; | ||
| 137 | case ResolutionSetup::Res4X: | ||
| 138 | info.up_scale = 4; | ||
| 139 | info.down_shift = 0; | ||
| 140 | break; | ||
| 141 | case ResolutionSetup::Res5X: | ||
| 142 | info.up_scale = 5; | ||
| 143 | info.down_shift = 0; | ||
| 144 | break; | ||
| 145 | case ResolutionSetup::Res6X: | ||
| 146 | info.up_scale = 6; | ||
| 147 | info.down_shift = 0; | ||
| 148 | break; | ||
| 149 | default: | ||
| 150 | UNREACHABLE(); | ||
| 151 | info.up_scale = 1; | ||
| 152 | info.down_shift = 0; | ||
| 153 | } | ||
| 154 | info.up_factor = static_cast<f32>(info.up_scale) / (1U << info.down_shift); | ||
| 155 | info.down_factor = static_cast<f32>(1U << info.down_shift) / info.up_scale; | ||
| 156 | info.active = info.up_scale != 1 || info.down_shift != 0; | ||
| 157 | } | ||
| 158 | |||
| 108 | void RestoreGlobalState(bool is_powered_on) { | 159 | void RestoreGlobalState(bool is_powered_on) { |
| 109 | // If a game is running, DO NOT restore the global settings state | 160 | // If a game is running, DO NOT restore the global settings state |
| 110 | if (is_powered_on) { | 161 | if (is_powered_on) { |
diff --git a/src/common/settings.h b/src/common/settings.h index 9ff4cf85d..42f8b4a7d 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -52,6 +52,56 @@ enum class NvdecEmulation : u32 { | |||
| 52 | GPU = 2, | 52 | GPU = 2, |
| 53 | }; | 53 | }; |
| 54 | 54 | ||
| 55 | enum class ResolutionSetup : u32 { | ||
| 56 | Res1_2X = 0, | ||
| 57 | Res3_4X = 1, | ||
| 58 | Res1X = 2, | ||
| 59 | Res2X = 3, | ||
| 60 | Res3X = 4, | ||
| 61 | Res4X = 5, | ||
| 62 | Res5X = 6, | ||
| 63 | Res6X = 7, | ||
| 64 | }; | ||
| 65 | |||
| 66 | enum class ScalingFilter : u32 { | ||
| 67 | NearestNeighbor = 0, | ||
| 68 | Bilinear = 1, | ||
| 69 | Bicubic = 2, | ||
| 70 | Gaussian = 3, | ||
| 71 | ScaleForce = 4, | ||
| 72 | Fsr = 5, | ||
| 73 | LastFilter = Fsr, | ||
| 74 | }; | ||
| 75 | |||
| 76 | enum class AntiAliasing : u32 { | ||
| 77 | None = 0, | ||
| 78 | Fxaa = 1, | ||
| 79 | LastAA = Fxaa, | ||
| 80 | }; | ||
| 81 | |||
| 82 | struct ResolutionScalingInfo { | ||
| 83 | u32 up_scale{1}; | ||
| 84 | u32 down_shift{0}; | ||
| 85 | f32 up_factor{1.0f}; | ||
| 86 | f32 down_factor{1.0f}; | ||
| 87 | bool active{}; | ||
| 88 | bool downscale{}; | ||
| 89 | |||
| 90 | s32 ScaleUp(s32 value) const { | ||
| 91 | if (value == 0) { | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | return std::max((value * static_cast<s32>(up_scale)) >> static_cast<s32>(down_shift), 1); | ||
| 95 | } | ||
| 96 | |||
| 97 | u32 ScaleUp(u32 value) const { | ||
| 98 | if (value == 0U) { | ||
| 99 | return 0U; | ||
| 100 | } | ||
| 101 | return std::max((value * up_scale) >> down_shift, 1U); | ||
| 102 | } | ||
| 103 | }; | ||
| 104 | |||
| 55 | /** The BasicSetting class is a simple resource manager. It defines a label and default value | 105 | /** The BasicSetting class is a simple resource manager. It defines a label and default value |
| 56 | * alongside the actual value of the setting for simpler and less-error prone use with frontend | 106 | * alongside the actual value of the setting for simpler and less-error prone use with frontend |
| 57 | * configurations. Setting a default value and label is required, though subclasses may deviate from | 107 | * configurations. Setting a default value and label is required, though subclasses may deviate from |
| @@ -451,7 +501,10 @@ struct Values { | |||
| 451 | "disable_shader_loop_safety_checks"}; | 501 | "disable_shader_loop_safety_checks"}; |
| 452 | Setting<int> vulkan_device{0, "vulkan_device"}; | 502 | Setting<int> vulkan_device{0, "vulkan_device"}; |
| 453 | 503 | ||
| 454 | Setting<u16> resolution_factor{1, "resolution_factor"}; | 504 | ResolutionScalingInfo resolution_info{}; |
| 505 | Setting<ResolutionSetup> resolution_setup{ResolutionSetup::Res1X, "resolution_setup"}; | ||
| 506 | Setting<ScalingFilter> scaling_filter{ScalingFilter::Bilinear, "scaling_filter"}; | ||
| 507 | Setting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"}; | ||
| 455 | // *nix platforms may have issues with the borderless windowed fullscreen mode. | 508 | // *nix platforms may have issues with the borderless windowed fullscreen mode. |
| 456 | // Default to exclusive fullscreen on these platforms for now. | 509 | // Default to exclusive fullscreen on these platforms for now. |
| 457 | RangedSetting<FullscreenMode> fullscreen_mode{ | 510 | RangedSetting<FullscreenMode> fullscreen_mode{ |
| @@ -462,7 +515,7 @@ struct Values { | |||
| 462 | #endif | 515 | #endif |
| 463 | FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"}; | 516 | FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"}; |
| 464 | RangedSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"}; | 517 | RangedSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"}; |
| 465 | RangedSetting<int> max_anisotropy{0, 0, 4, "max_anisotropy"}; | 518 | RangedSetting<int> max_anisotropy{0, 0, 5, "max_anisotropy"}; |
| 466 | Setting<bool> use_speed_limit{true, "use_speed_limit"}; | 519 | Setting<bool> use_speed_limit{true, "use_speed_limit"}; |
| 467 | RangedSetting<u16> speed_limit{100, 0, 9999, "speed_limit"}; | 520 | RangedSetting<u16> speed_limit{100, 0, 9999, "speed_limit"}; |
| 468 | Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"}; | 521 | Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"}; |
| @@ -595,6 +648,8 @@ std::string GetTimeZoneString(); | |||
| 595 | 648 | ||
| 596 | void LogSettings(); | 649 | void LogSettings(); |
| 597 | 650 | ||
| 651 | void UpdateRescalingInfo(); | ||
| 652 | |||
| 598 | // Restore the global state of all applicable settings in the Values struct | 653 | // Restore the global state of all applicable settings in the Values struct |
| 599 | void RestoreGlobalState(bool is_powered_on); | 654 | void RestoreGlobalState(bool is_powered_on); |
| 600 | 655 | ||
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index 0832463d6..4b58b672a 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp | |||
| @@ -44,16 +44,13 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { | |||
| 44 | return res; | 44 | return res; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { | 47 | FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale) { |
| 48 | u32 width, height; | 48 | const bool is_docked = Settings::values.use_docked_mode.GetValue(); |
| 49 | const u32 screen_width = is_docked ? ScreenDocked::Width : ScreenUndocked::Width; | ||
| 50 | const u32 screen_height = is_docked ? ScreenDocked::Height : ScreenUndocked::Height; | ||
| 49 | 51 | ||
| 50 | if (Settings::values.use_docked_mode.GetValue()) { | 52 | const u32 width = static_cast<u32>(static_cast<f32>(screen_width) * res_scale); |
| 51 | width = ScreenDocked::Width * res_scale; | 53 | const u32 height = static_cast<u32>(static_cast<f32>(screen_height) * res_scale); |
| 52 | height = ScreenDocked::Height * res_scale; | ||
| 53 | } else { | ||
| 54 | width = ScreenUndocked::Width * res_scale; | ||
| 55 | height = ScreenUndocked::Height * res_scale; | ||
| 56 | } | ||
| 57 | 54 | ||
| 58 | return DefaultFrameLayout(width, height); | 55 | return DefaultFrameLayout(width, height); |
| 59 | } | 56 | } |
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index e2e3bbbb3..2e36c0163 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h | |||
| @@ -60,7 +60,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); | |||
| 60 | * Convenience method to get frame layout by resolution scale | 60 | * Convenience method to get frame layout by resolution scale |
| 61 | * @param res_scale resolution scale factor | 61 | * @param res_scale resolution scale factor |
| 62 | */ | 62 | */ |
| 63 | FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); | 63 | FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale); |
| 64 | 64 | ||
| 65 | /** | 65 | /** |
| 66 | * Convenience method to determine emulation aspect ratio | 66 | * Convenience method to determine emulation aspect ratio |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 50c2ace93..aee8d4f93 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -797,15 +797,11 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& | |||
| 797 | rb.Push(ResultSuccess); | 797 | rb.Push(ResultSuccess); |
| 798 | 798 | ||
| 799 | if (Settings::values.use_docked_mode.GetValue()) { | 799 | if (Settings::values.use_docked_mode.GetValue()) { |
| 800 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) * | 800 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth)); |
| 801 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | 801 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight)); |
| 802 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) * | ||
| 803 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | ||
| 804 | } else { | 802 | } else { |
| 805 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) * | 803 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth)); |
| 806 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | 804 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight)); |
| 807 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) * | ||
| 808 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | ||
| 809 | } | 805 | } |
| 810 | } | 806 | } |
| 811 | 807 | ||
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 63d5242c4..75ee3e5e4 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -541,11 +541,8 @@ private: | |||
| 541 | switch (transaction) { | 541 | switch (transaction) { |
| 542 | case TransactionId::Connect: { | 542 | case TransactionId::Connect: { |
| 543 | IGBPConnectRequestParcel request{ctx.ReadBuffer()}; | 543 | IGBPConnectRequestParcel request{ctx.ReadBuffer()}; |
| 544 | IGBPConnectResponseParcel response{ | 544 | IGBPConnectResponseParcel response{static_cast<u32>(DisplayResolution::UndockedWidth), |
| 545 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * | 545 | static_cast<u32>(DisplayResolution::UndockedHeight)}; |
| 546 | Settings::values.resolution_factor.GetValue()), | ||
| 547 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * | ||
| 548 | Settings::values.resolution_factor.GetValue())}; | ||
| 549 | 546 | ||
| 550 | buffer_queue.Connect(); | 547 | buffer_queue.Connect(); |
| 551 | 548 | ||
| @@ -775,15 +772,11 @@ private: | |||
| 775 | rb.Push(ResultSuccess); | 772 | rb.Push(ResultSuccess); |
| 776 | 773 | ||
| 777 | if (Settings::values.use_docked_mode.GetValue()) { | 774 | if (Settings::values.use_docked_mode.GetValue()) { |
| 778 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) * | 775 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth)); |
| 779 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | 776 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight)); |
| 780 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) * | ||
| 781 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | ||
| 782 | } else { | 777 | } else { |
| 783 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) * | 778 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth)); |
| 784 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | 779 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight)); |
| 785 | rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) * | ||
| 786 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | ||
| 787 | } | 780 | } |
| 788 | 781 | ||
| 789 | rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games. | 782 | rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games. |
| @@ -1063,10 +1056,8 @@ private: | |||
| 1063 | // This only returns the fixed values of 1280x720 and makes no distinguishing | 1056 | // This only returns the fixed values of 1280x720 and makes no distinguishing |
| 1064 | // between docked and undocked dimensions. We take the liberty of applying | 1057 | // between docked and undocked dimensions. We take the liberty of applying |
| 1065 | // the resolution scaling factor here. | 1058 | // the resolution scaling factor here. |
| 1066 | rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) * | 1059 | rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth)); |
| 1067 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | 1060 | rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight)); |
| 1068 | rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) * | ||
| 1069 | static_cast<u32>(Settings::values.resolution_factor.GetValue())); | ||
| 1070 | } | 1061 | } |
| 1071 | 1062 | ||
| 1072 | void SetLayerScalingMode(Kernel::HLERequestContext& ctx) { | 1063 | void SetLayerScalingMode(Kernel::HLERequestContext& ctx) { |
| @@ -1099,8 +1090,6 @@ private: | |||
| 1099 | LOG_WARNING(Service_VI, "(STUBBED) called"); | 1090 | LOG_WARNING(Service_VI, "(STUBBED) called"); |
| 1100 | 1091 | ||
| 1101 | DisplayInfo display_info; | 1092 | DisplayInfo display_info; |
| 1102 | display_info.width *= static_cast<u64>(Settings::values.resolution_factor.GetValue()); | ||
| 1103 | display_info.height *= static_cast<u64>(Settings::values.resolution_factor.GetValue()); | ||
| 1104 | ctx.WriteBuffer(&display_info, sizeof(DisplayInfo)); | 1093 | ctx.WriteBuffer(&display_info, sizeof(DisplayInfo)); |
| 1105 | IPC::ResponseBuilder rb{ctx, 4}; | 1094 | IPC::ResponseBuilder rb{ctx, 4}; |
| 1106 | rb.Push(ResultSuccess); | 1095 | rb.Push(ResultSuccess); |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 191475f71..654db0b52 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -229,8 +229,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, | |||
| 229 | AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue()); | 229 | AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue()); |
| 230 | AddField(field_type, "Renderer_Backend", | 230 | AddField(field_type, "Renderer_Backend", |
| 231 | TranslateRenderer(Settings::values.renderer_backend.GetValue())); | 231 | TranslateRenderer(Settings::values.renderer_backend.GetValue())); |
| 232 | AddField(field_type, "Renderer_ResolutionFactor", | ||
| 233 | Settings::values.resolution_factor.GetValue()); | ||
| 234 | AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue()); | 232 | AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue()); |
| 235 | AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue()); | 233 | AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue()); |
| 236 | AddField(field_type, "Renderer_UseDiskShaderCache", | 234 | AddField(field_type, "Renderer_UseDiskShaderCache", |
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b5b7e5e83..bc3df80c8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC | |||
| 221 | ir_opt/lower_fp16_to_fp32.cpp | 221 | ir_opt/lower_fp16_to_fp32.cpp |
| 222 | ir_opt/lower_int64_to_int32.cpp | 222 | ir_opt/lower_int64_to_int32.cpp |
| 223 | ir_opt/passes.h | 223 | ir_opt/passes.h |
| 224 | ir_opt/rescaling_pass.cpp | ||
| 224 | ir_opt/ssa_rewrite_pass.cpp | 225 | ir_opt/ssa_rewrite_pass.cpp |
| 225 | ir_opt/texture_pass.cpp | 226 | ir_opt/texture_pass.cpp |
| 226 | ir_opt/verification_pass.cpp | 227 | ir_opt/verification_pass.cpp |
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h index 35503000c..669702553 100644 --- a/src/shader_recompiler/backend/bindings.h +++ b/src/shader_recompiler/backend/bindings.h | |||
| @@ -14,6 +14,8 @@ struct Bindings { | |||
| 14 | u32 storage_buffer{}; | 14 | u32 storage_buffer{}; |
| 15 | u32 texture{}; | 15 | u32 texture{}; |
| 16 | u32 image{}; | 16 | u32 image{}; |
| 17 | u32 texture_scaling_index{}; | ||
| 18 | u32 image_scaling_index{}; | ||
| 17 | }; | 19 | }; |
| 18 | 20 | ||
| 19 | } // namespace Shader::Backend | 21 | } // namespace Shader::Backend |
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 069c019ad..8fd459dfe 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include "shader_recompiler/backend/bindings.h" | 7 | #include "shader_recompiler/backend/bindings.h" |
| 8 | #include "shader_recompiler/backend/glasm/emit_context.h" | 8 | #include "shader_recompiler/backend/glasm/emit_context.h" |
| 9 | #include "shader_recompiler/backend/glasm/emit_glasm.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | 10 | #include "shader_recompiler/frontend/ir/program.h" |
| 10 | #include "shader_recompiler/profile.h" | 11 | #include "shader_recompiler/profile.h" |
| 11 | #include "shader_recompiler/runtime_info.h" | 12 | #include "shader_recompiler/runtime_info.h" |
| @@ -55,7 +56,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||
| 55 | } | 56 | } |
| 56 | if (!runtime_info.glasm_use_storage_buffers) { | 57 | if (!runtime_info.glasm_use_storage_buffers) { |
| 57 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { | 58 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { |
| 58 | Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); | 59 | const size_t index{num + PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE}; |
| 60 | Add("PARAM c[{}]={{program.local[0..{}]}};", index, index - 1); | ||
| 59 | } | 61 | } |
| 60 | } | 62 | } |
| 61 | stage = program.stage; | 63 | stage = program.stage; |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4ce1c4f54..004658546 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -448,6 +448,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I | |||
| 448 | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); | 448 | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); |
| 449 | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); | 449 | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); |
| 450 | } | 450 | } |
| 451 | if (program.info.uses_rescaling_uniform) { | ||
| 452 | header += "PARAM scaling[1]={program.local[0..0]};"; | ||
| 453 | } | ||
| 451 | header += "TEMP "; | 454 | header += "TEMP "; |
| 452 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { | 455 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { |
| 453 | header += fmt::format("R{},", index); | 456 | header += fmt::format("R{},", index); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index bcb55f062..292655acb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h | |||
| @@ -13,6 +13,8 @@ | |||
| 13 | 13 | ||
| 14 | namespace Shader::Backend::GLASM { | 14 | namespace Shader::Backend::GLASM { |
| 15 | 15 | ||
| 16 | constexpr u32 PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE = 1; | ||
| 17 | |||
| 16 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | 18 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, |
| 17 | IR::Program& program, Bindings& bindings); | 19 | IR::Program& program, Bindings& bindings); |
| 18 | 20 | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 09e3a9b82..d325d31c7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -608,6 +608,24 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re | |||
| 608 | ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); | 608 | ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); |
| 609 | } | 609 | } |
| 610 | 610 | ||
| 611 | void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { | ||
| 612 | if (!index.IsImmediate()) { | ||
| 613 | throw NotImplementedException("Non-constant texture rescaling"); | ||
| 614 | } | ||
| 615 | ctx.Add("AND.U RC.x,scaling[0].x,{};" | ||
| 616 | "SNE.S {},RC.x,0;", | ||
| 617 | 1u << index.U32(), ctx.reg_alloc.Define(inst)); | ||
| 618 | } | ||
| 619 | |||
| 620 | void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { | ||
| 621 | if (!index.IsImmediate()) { | ||
| 622 | throw NotImplementedException("Non-constant texture rescaling"); | ||
| 623 | } | ||
| 624 | ctx.Add("AND.U RC.x,scaling[0].y,{};" | ||
| 625 | "SNE.S {},RC.x,0;", | ||
| 626 | 1u << index.U32(), ctx.reg_alloc.Define(inst)); | ||
| 627 | } | ||
| 628 | |||
| 611 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | 629 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, |
| 612 | ScalarU32 value) { | 630 | ScalarU32 value) { |
| 613 | ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); | 631 | ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 12afda43b..1f343bff5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | |||
| @@ -72,6 +72,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | |||
| 72 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | 72 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); |
| 73 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | 73 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); |
| 74 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | 74 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); |
| 75 | void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); | ||
| 75 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); | 76 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); |
| 76 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); | 77 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); |
| 77 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | 78 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); |
| @@ -303,6 +304,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | |||
| 303 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | 304 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); |
| 304 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | 305 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); |
| 305 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | 306 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); |
| 307 | void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 308 | void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 306 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | 309 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); |
| 307 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); | 310 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); |
| 308 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | 311 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); |
| @@ -553,6 +556,8 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 553 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | 556 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); |
| 554 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | 557 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, |
| 555 | Register color); | 558 | Register color); |
| 559 | void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); | ||
| 560 | void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); | ||
| 556 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | 561 | void EmitBindlessImageAtomicIAdd32(EmitContext&); |
| 557 | void EmitBindlessImageAtomicSMin32(EmitContext&); | 562 | void EmitBindlessImageAtomicSMin32(EmitContext&); |
| 558 | void EmitBindlessImageAtomicUMin32(EmitContext&); | 563 | void EmitBindlessImageAtomicUMin32(EmitContext&); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index f55c26b76..8aa494a4d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | |||
| @@ -90,6 +90,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | |||
| 90 | ctx.Add("MUL.S {}.x,{},{};", inst, a, b); | 90 | ctx.Add("MUL.S {}.x,{},{};", inst, a, b); |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 94 | ctx.Add("DIV.S {}.x,{},{};", inst, a, b); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 98 | ctx.Add("DIV.U {}.x,{},{};", inst, a, b); | ||
| 99 | } | ||
| 100 | |||
| 93 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | 101 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { |
| 94 | if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { | 102 | if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { |
| 95 | ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); | 103 | ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index e537f6073..681aeda8d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp | |||
| @@ -210,6 +210,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | |||
| 210 | ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); | 210 | ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); |
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { | ||
| 214 | ctx.Add("MOV.F {}.x,scaling[0].z;", inst); | ||
| 215 | } | ||
| 216 | |||
| 213 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { | 217 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { |
| 214 | ctx.Add("MOV.S {}.x,0;", inst); | 218 | ctx.Add("MOV.S {}.x,0;", inst); |
| 215 | } | 219 | } |
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 4e6f2c0fe..97bd59302 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp | |||
| @@ -393,6 +393,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||
| 393 | DefineGenericOutput(index, program.invocations); | 393 | DefineGenericOutput(index, program.invocations); |
| 394 | } | 394 | } |
| 395 | } | 395 | } |
| 396 | if (info.uses_rescaling_uniform) { | ||
| 397 | header += "layout(location=0) uniform vec4 scaling;"; | ||
| 398 | } | ||
| 396 | DefineConstantBuffers(bindings); | 399 | DefineConstantBuffers(bindings); |
| 397 | DefineStorageBuffers(bindings); | 400 | DefineStorageBuffers(bindings); |
| 398 | SetupImages(bindings); | 401 | SetupImages(bindings); |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 170db269a..4c26f3829 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -445,6 +445,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | |||
| 445 | ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); | 445 | ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); |
| 446 | } | 446 | } |
| 447 | 447 | ||
| 448 | void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { | ||
| 449 | ctx.AddF32("{}=scaling.z;", inst); | ||
| 450 | } | ||
| 451 | |||
| 448 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { | 452 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { |
| 449 | ctx.AddU32("{}=lmem[{}];", inst, word_offset); | 453 | ctx.AddU32("{}=lmem[{}];", inst, word_offset); |
| 450 | } | 454 | } |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 447eb8e0a..2f78d0267 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -612,6 +612,22 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value | |||
| 612 | value); | 612 | value); |
| 613 | } | 613 | } |
| 614 | 614 | ||
| 615 | void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { | ||
| 616 | if (!index.IsImmediate()) { | ||
| 617 | throw NotImplementedException("Non-constant texture rescaling"); | ||
| 618 | } | ||
| 619 | const u32 image_index{index.U32()}; | ||
| 620 | ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index); | ||
| 621 | } | ||
| 622 | |||
| 623 | void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { | ||
| 624 | if (!index.IsImmediate()) { | ||
| 625 | throw NotImplementedException("Non-constant texture rescaling"); | ||
| 626 | } | ||
| 627 | const u32 image_index{index.U32()}; | ||
| 628 | ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index); | ||
| 629 | } | ||
| 630 | |||
| 615 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { | 631 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { |
| 616 | NotImplemented(); | 632 | NotImplemented(); |
| 617 | } | 633 | } |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 5936d086f..f86502e4c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | |||
| @@ -85,6 +85,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | |||
| 85 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | 85 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); |
| 86 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | 86 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); |
| 87 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | 87 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); |
| 88 | void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); | ||
| 88 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); | 89 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); |
| 89 | void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); | 90 | void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); |
| 90 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | 91 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); |
| @@ -362,6 +363,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin | |||
| 362 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | 363 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); |
| 363 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | 364 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); |
| 364 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | 365 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); |
| 366 | void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 367 | void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 365 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | 368 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); |
| 366 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | 369 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); |
| 367 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | 370 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); |
| @@ -627,6 +630,8 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 627 | std::string_view coords); | 630 | std::string_view coords); |
| 628 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 631 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 629 | std::string_view coords, std::string_view color); | 632 | std::string_view coords, std::string_view color); |
| 633 | void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); | ||
| 634 | void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); | ||
| 630 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | 635 | void EmitBindlessImageAtomicIAdd32(EmitContext&); |
| 631 | void EmitBindlessImageAtomicSMin32(EmitContext&); | 636 | void EmitBindlessImageAtomicSMin32(EmitContext&); |
| 632 | void EmitBindlessImageAtomicUMin32(EmitContext&); | 637 | void EmitBindlessImageAtomicUMin32(EmitContext&); |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 38419f88f..88c1d4c5e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | |||
| @@ -78,6 +78,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin | |||
| 78 | ctx.AddU32("{}=uint({}*{});", inst, a, b); | 78 | ctx.AddU32("{}=uint({}*{});", inst, a, b); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 82 | ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 86 | ctx.AddU32("{}={}/{};", inst, a, b); | ||
| 87 | } | ||
| 88 | |||
| 81 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | 89 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { |
| 82 | ctx.AddU32("{}=uint(-({}));", inst, value); | 90 | ctx.AddU32("{}=uint(-({}));", inst, value); |
| 83 | } | 91 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3c84e6466..723455462 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -7,11 +7,14 @@ | |||
| 7 | #include <climits> | 7 | #include <climits> |
| 8 | #include <string_view> | 8 | #include <string_view> |
| 9 | 9 | ||
| 10 | #include <boost/container/static_vector.hpp> | ||
| 11 | |||
| 10 | #include <fmt/format.h> | 12 | #include <fmt/format.h> |
| 11 | 13 | ||
| 12 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 15 | #include "common/div_ceil.h" |
| 14 | #include "shader_recompiler/backend/spirv/emit_context.h" | 16 | #include "shader_recompiler/backend/spirv/emit_context.h" |
| 17 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 15 | 18 | ||
| 16 | namespace Shader::Backend::SPIRV { | 19 | namespace Shader::Backend::SPIRV { |
| 17 | namespace { | 20 | namespace { |
| @@ -474,8 +477,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie | |||
| 474 | 477 | ||
| 475 | EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, | 478 | EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, |
| 476 | IR::Program& program, Bindings& bindings) | 479 | IR::Program& program, Bindings& bindings) |
| 477 | : Sirit::Module(profile_.supported_spirv), profile{profile_}, | 480 | : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, |
| 478 | runtime_info{runtime_info_}, stage{program.stage} { | 481 | stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index}, |
| 482 | image_rescaling_index{bindings.image_scaling_index} { | ||
| 479 | const bool is_unified{profile.unified_descriptor_binding}; | 483 | const bool is_unified{profile.unified_descriptor_binding}; |
| 480 | u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; | 484 | u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; |
| 481 | u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; | 485 | u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; |
| @@ -492,10 +496,11 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf | |||
| 492 | DefineStorageBuffers(program.info, storage_binding); | 496 | DefineStorageBuffers(program.info, storage_binding); |
| 493 | DefineTextureBuffers(program.info, texture_binding); | 497 | DefineTextureBuffers(program.info, texture_binding); |
| 494 | DefineImageBuffers(program.info, image_binding); | 498 | DefineImageBuffers(program.info, image_binding); |
| 495 | DefineTextures(program.info, texture_binding); | 499 | DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); |
| 496 | DefineImages(program.info, image_binding); | 500 | DefineImages(program.info, image_binding, bindings.image_scaling_index); |
| 497 | DefineAttributeMemAccess(program.info); | 501 | DefineAttributeMemAccess(program.info); |
| 498 | DefineGlobalMemoryFunctions(program.info); | 502 | DefineGlobalMemoryFunctions(program.info); |
| 503 | DefineRescalingInput(program.info); | ||
| 499 | } | 504 | } |
| 500 | 505 | ||
| 501 | EmitContext::~EmitContext() = default; | 506 | EmitContext::~EmitContext() = default; |
| @@ -996,6 +1001,73 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | |||
| 996 | define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); | 1001 | define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); |
| 997 | } | 1002 | } |
| 998 | 1003 | ||
| 1004 | void EmitContext::DefineRescalingInput(const Info& info) { | ||
| 1005 | if (!info.uses_rescaling_uniform) { | ||
| 1006 | return; | ||
| 1007 | } | ||
| 1008 | if (profile.unified_descriptor_binding) { | ||
| 1009 | DefineRescalingInputPushConstant(); | ||
| 1010 | } else { | ||
| 1011 | DefineRescalingInputUniformConstant(); | ||
| 1012 | } | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | void EmitContext::DefineRescalingInputPushConstant() { | ||
| 1016 | boost::container::static_vector<Id, 3> members{}; | ||
| 1017 | u32 member_index{0}; | ||
| 1018 | |||
| 1019 | rescaling_textures_type = TypeArray(U32[1], Const(4u)); | ||
| 1020 | Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u); | ||
| 1021 | members.push_back(rescaling_textures_type); | ||
| 1022 | rescaling_textures_member_index = member_index++; | ||
| 1023 | |||
| 1024 | rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS)); | ||
| 1025 | Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u); | ||
| 1026 | members.push_back(rescaling_images_type); | ||
| 1027 | rescaling_images_member_index = member_index++; | ||
| 1028 | |||
| 1029 | if (stage != Stage::Compute) { | ||
| 1030 | members.push_back(F32[1]); | ||
| 1031 | rescaling_downfactor_member_index = member_index++; | ||
| 1032 | } | ||
| 1033 | const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))}; | ||
| 1034 | Decorate(push_constant_struct, spv::Decoration::Block); | ||
| 1035 | Name(push_constant_struct, "ResolutionInfo"); | ||
| 1036 | |||
| 1037 | MemberDecorate(push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset, | ||
| 1038 | static_cast<u32>(offsetof(RescalingLayout, rescaling_textures))); | ||
| 1039 | MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures"); | ||
| 1040 | |||
| 1041 | MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset, | ||
| 1042 | static_cast<u32>(offsetof(RescalingLayout, rescaling_images))); | ||
| 1043 | MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images"); | ||
| 1044 | |||
| 1045 | if (stage != Stage::Compute) { | ||
| 1046 | MemberDecorate(push_constant_struct, rescaling_downfactor_member_index, | ||
| 1047 | spv::Decoration::Offset, | ||
| 1048 | static_cast<u32>(offsetof(RescalingLayout, down_factor))); | ||
| 1049 | MemberName(push_constant_struct, rescaling_downfactor_member_index, "down_factor"); | ||
| 1050 | } | ||
| 1051 | const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)}; | ||
| 1052 | rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant); | ||
| 1053 | Name(rescaling_push_constants, "rescaling_push_constants"); | ||
| 1054 | |||
| 1055 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1056 | interfaces.push_back(rescaling_push_constants); | ||
| 1057 | } | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | void EmitContext::DefineRescalingInputUniformConstant() { | ||
| 1061 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, F32[4])}; | ||
| 1062 | rescaling_uniform_constant = | ||
| 1063 | AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 1064 | Decorate(rescaling_uniform_constant, spv::Decoration::Location, 0u); | ||
| 1065 | |||
| 1066 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1067 | interfaces.push_back(rescaling_uniform_constant); | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 999 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | 1071 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { |
| 1000 | if (info.constant_buffer_descriptors.empty()) { | 1072 | if (info.constant_buffer_descriptors.empty()) { |
| 1001 | return; | 1073 | return; |
| @@ -1184,7 +1256,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { | |||
| 1184 | } | 1256 | } |
| 1185 | } | 1257 | } |
| 1186 | 1258 | ||
| 1187 | void EmitContext::DefineTextures(const Info& info, u32& binding) { | 1259 | void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) { |
| 1188 | textures.reserve(info.texture_descriptors.size()); | 1260 | textures.reserve(info.texture_descriptors.size()); |
| 1189 | for (const TextureDescriptor& desc : info.texture_descriptors) { | 1261 | for (const TextureDescriptor& desc : info.texture_descriptors) { |
| 1190 | const Id image_type{ImageType(*this, desc)}; | 1262 | const Id image_type{ImageType(*this, desc)}; |
| @@ -1206,13 +1278,14 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { | |||
| 1206 | interfaces.push_back(id); | 1278 | interfaces.push_back(id); |
| 1207 | } | 1279 | } |
| 1208 | ++binding; | 1280 | ++binding; |
| 1281 | ++scaling_index; | ||
| 1209 | } | 1282 | } |
| 1210 | if (info.uses_atomic_image_u32) { | 1283 | if (info.uses_atomic_image_u32) { |
| 1211 | image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); | 1284 | image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); |
| 1212 | } | 1285 | } |
| 1213 | } | 1286 | } |
| 1214 | 1287 | ||
| 1215 | void EmitContext::DefineImages(const Info& info, u32& binding) { | 1288 | void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) { |
| 1216 | images.reserve(info.image_descriptors.size()); | 1289 | images.reserve(info.image_descriptors.size()); |
| 1217 | for (const ImageDescriptor& desc : info.image_descriptors) { | 1290 | for (const ImageDescriptor& desc : info.image_descriptors) { |
| 1218 | if (desc.count != 1) { | 1291 | if (desc.count != 1) { |
| @@ -1233,6 +1306,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { | |||
| 1233 | interfaces.push_back(id); | 1306 | interfaces.push_back(id); |
| 1234 | } | 1307 | } |
| 1235 | ++binding; | 1308 | ++binding; |
| 1309 | ++scaling_index; | ||
| 1236 | } | 1310 | } |
| 1237 | } | 1311 | } |
| 1238 | 1312 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 112c52382..63f8185d9 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -238,6 +238,16 @@ public: | |||
| 238 | Id indexed_load_func{}; | 238 | Id indexed_load_func{}; |
| 239 | Id indexed_store_func{}; | 239 | Id indexed_store_func{}; |
| 240 | 240 | ||
| 241 | Id rescaling_uniform_constant{}; | ||
| 242 | Id rescaling_push_constants{}; | ||
| 243 | Id rescaling_textures_type{}; | ||
| 244 | Id rescaling_images_type{}; | ||
| 245 | u32 rescaling_textures_member_index{}; | ||
| 246 | u32 rescaling_images_member_index{}; | ||
| 247 | u32 rescaling_downfactor_member_index{}; | ||
| 248 | u32 texture_rescaling_index{}; | ||
| 249 | u32 image_rescaling_index{}; | ||
| 250 | |||
| 241 | Id local_memory{}; | 251 | Id local_memory{}; |
| 242 | 252 | ||
| 243 | Id shared_memory_u8{}; | 253 | Id shared_memory_u8{}; |
| @@ -310,10 +320,13 @@ private: | |||
| 310 | void DefineStorageBuffers(const Info& info, u32& binding); | 320 | void DefineStorageBuffers(const Info& info, u32& binding); |
| 311 | void DefineTextureBuffers(const Info& info, u32& binding); | 321 | void DefineTextureBuffers(const Info& info, u32& binding); |
| 312 | void DefineImageBuffers(const Info& info, u32& binding); | 322 | void DefineImageBuffers(const Info& info, u32& binding); |
| 313 | void DefineTextures(const Info& info, u32& binding); | 323 | void DefineTextures(const Info& info, u32& binding, u32& scaling_index); |
| 314 | void DefineImages(const Info& info, u32& binding); | 324 | void DefineImages(const Info& info, u32& binding, u32& scaling_index); |
| 315 | void DefineAttributeMemAccess(const Info& info); | 325 | void DefineAttributeMemAccess(const Info& info); |
| 316 | void DefineGlobalMemoryFunctions(const Info& info); | 326 | void DefineGlobalMemoryFunctions(const Info& info); |
| 327 | void DefineRescalingInput(const Info& info); | ||
| 328 | void DefineRescalingInputPushConstant(); | ||
| 329 | void DefineRescalingInputUniformConstant(); | ||
| 317 | 330 | ||
| 318 | void DefineInputs(const IR::Program& program); | 331 | void DefineInputs(const IR::Program& program); |
| 319 | void DefineOutputs(const IR::Program& program); | 332 | void DefineOutputs(const IR::Program& program); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index db0c935fe..4b25534ce 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -16,6 +16,19 @@ | |||
| 16 | 16 | ||
| 17 | namespace Shader::Backend::SPIRV { | 17 | namespace Shader::Backend::SPIRV { |
| 18 | 18 | ||
| 19 | constexpr u32 NUM_TEXTURE_SCALING_WORDS = 4; | ||
| 20 | constexpr u32 NUM_IMAGE_SCALING_WORDS = 2; | ||
| 21 | constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = | ||
| 22 | NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS; | ||
| 23 | |||
| 24 | struct RescalingLayout { | ||
| 25 | alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; | ||
| 26 | alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; | ||
| 27 | alignas(16) u32 down_factor; | ||
| 28 | }; | ||
| 29 | constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); | ||
| 30 | constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); | ||
| 31 | |||
| 19 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, | 32 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, |
| 20 | IR::Program& program, Bindings& bindings); | 33 | IR::Program& program, Bindings& bindings); |
| 21 | 34 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d3a93d5f4..bac683ae1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -526,6 +526,18 @@ Id EmitYDirection(EmitContext& ctx) { | |||
| 526 | return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); | 526 | return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); |
| 527 | } | 527 | } |
| 528 | 528 | ||
| 529 | Id EmitResolutionDownFactor(EmitContext& ctx) { | ||
| 530 | if (ctx.profile.unified_descriptor_binding) { | ||
| 531 | const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])}; | ||
| 532 | const Id index{ctx.Const(ctx.rescaling_downfactor_member_index)}; | ||
| 533 | const Id pointer{ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, index)}; | ||
| 534 | return ctx.OpLoad(ctx.F32[1], pointer); | ||
| 535 | } else { | ||
| 536 | const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; | ||
| 537 | return ctx.OpCompositeExtract(ctx.F32[1], composite, 2u); | ||
| 538 | } | ||
| 539 | } | ||
| 540 | |||
| 529 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { | 541 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { |
| 530 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | 542 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; |
| 531 | return ctx.OpLoad(ctx.U32[1], pointer); | 543 | return ctx.OpLoad(ctx.U32[1], pointer); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 1d5364309..4d168a96d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -224,6 +224,36 @@ Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx | |||
| 224 | Decorate(ctx, inst, sample); | 224 | Decorate(ctx, inst, sample); |
| 225 | return ctx.OpCompositeExtract(result_type, sample, 1U); | 225 | return ctx.OpCompositeExtract(result_type, sample, 1U); |
| 226 | } | 226 | } |
| 227 | |||
| 228 | Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_index) { | ||
| 229 | const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])}; | ||
| 230 | Id bit{}; | ||
| 231 | if (index.IsImmediate()) { | ||
| 232 | // Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL. | ||
| 233 | // LOP32I.NZ is used to set the predicate rather than BFE+ISETP. | ||
| 234 | const u32 index_value{index.U32() + base_index}; | ||
| 235 | const Id word_index{ctx.Const(index_value / 32)}; | ||
| 236 | const Id bit_index_mask{ctx.Const(1u << (index_value % 32))}; | ||
| 237 | const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, | ||
| 238 | member_index, word_index)}; | ||
| 239 | const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; | ||
| 240 | bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask); | ||
| 241 | } else { | ||
| 242 | Id index_value{ctx.Def(index)}; | ||
| 243 | if (base_index != 0) { | ||
| 244 | index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index)); | ||
| 245 | } | ||
| 246 | const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))}; | ||
| 247 | bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u)); | ||
| 248 | } | ||
| 249 | return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value); | ||
| 250 | } | ||
| 251 | |||
| 252 | Id BitTest(EmitContext& ctx, Id mask, Id bit) { | ||
| 253 | const Id shifted{ctx.OpShiftRightLogical(ctx.U32[1], mask, bit)}; | ||
| 254 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; | ||
| 255 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); | ||
| 256 | } | ||
| 227 | } // Anonymous namespace | 257 | } // Anonymous namespace |
| 228 | 258 | ||
| 229 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | 259 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { |
| @@ -470,4 +500,28 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id | |||
| 470 | ctx.OpImageWrite(Image(ctx, index, info), coords, color); | 500 | ctx.OpImageWrite(Image(ctx, index, info), coords, color); |
| 471 | } | 501 | } |
| 472 | 502 | ||
| 503 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) { | ||
| 504 | if (ctx.profile.unified_descriptor_binding) { | ||
| 505 | const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)}; | ||
| 506 | return IsScaled(ctx, index, member_index, ctx.texture_rescaling_index); | ||
| 507 | } else { | ||
| 508 | const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; | ||
| 509 | const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 0u)}; | ||
| 510 | const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)}; | ||
| 511 | return BitTest(ctx, mask, ctx.Def(index)); | ||
| 512 | } | ||
| 513 | } | ||
| 514 | |||
| 515 | Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index) { | ||
| 516 | if (ctx.profile.unified_descriptor_binding) { | ||
| 517 | const Id member_index{ctx.Const(ctx.rescaling_images_member_index)}; | ||
| 518 | return IsScaled(ctx, index, member_index, ctx.image_rescaling_index); | ||
| 519 | } else { | ||
| 520 | const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; | ||
| 521 | const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 1u)}; | ||
| 522 | const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)}; | ||
| 523 | return BitTest(ctx, mask, ctx.Def(index)); | ||
| 524 | } | ||
| 525 | } | ||
| 526 | |||
| 473 | } // namespace Shader::Backend::SPIRV | 527 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index c9db1c164..6cd22dd3e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | |||
| @@ -75,6 +75,7 @@ Id EmitInvocationId(EmitContext& ctx); | |||
| 75 | Id EmitSampleId(EmitContext& ctx); | 75 | Id EmitSampleId(EmitContext& ctx); |
| 76 | Id EmitIsHelperInvocation(EmitContext& ctx); | 76 | Id EmitIsHelperInvocation(EmitContext& ctx); |
| 77 | Id EmitYDirection(EmitContext& ctx); | 77 | Id EmitYDirection(EmitContext& ctx); |
| 78 | Id EmitResolutionDownFactor(EmitContext& ctx); | ||
| 78 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset); | 79 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset); |
| 79 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); | 80 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); |
| 80 | Id EmitUndefU1(EmitContext& ctx); | 81 | Id EmitUndefU1(EmitContext& ctx); |
| @@ -283,6 +284,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b); | |||
| 283 | Id EmitISub32(EmitContext& ctx, Id a, Id b); | 284 | Id EmitISub32(EmitContext& ctx, Id a, Id b); |
| 284 | Id EmitISub64(EmitContext& ctx, Id a, Id b); | 285 | Id EmitISub64(EmitContext& ctx, Id a, Id b); |
| 285 | Id EmitIMul32(EmitContext& ctx, Id a, Id b); | 286 | Id EmitIMul32(EmitContext& ctx, Id a, Id b); |
| 287 | Id EmitSDiv32(EmitContext& ctx, Id a, Id b); | ||
| 288 | Id EmitUDiv32(EmitContext& ctx, Id a, Id b); | ||
| 286 | Id EmitINeg32(EmitContext& ctx, Id value); | 289 | Id EmitINeg32(EmitContext& ctx, Id value); |
| 287 | Id EmitINeg64(EmitContext& ctx, Id value); | 290 | Id EmitINeg64(EmitContext& ctx, Id value); |
| 288 | Id EmitIAbs32(EmitContext& ctx, Id value); | 291 | Id EmitIAbs32(EmitContext& ctx, Id value); |
| @@ -510,6 +513,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I | |||
| 510 | Id derivates, Id offset, Id lod_clamp); | 513 | Id derivates, Id offset, Id lod_clamp); |
| 511 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 514 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 512 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); | 515 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); |
| 516 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); | ||
| 517 | Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index); | ||
| 513 | Id EmitBindlessImageAtomicIAdd32(EmitContext&); | 518 | Id EmitBindlessImageAtomicIAdd32(EmitContext&); |
| 514 | Id EmitBindlessImageAtomicSMin32(EmitContext&); | 519 | Id EmitBindlessImageAtomicSMin32(EmitContext&); |
| 515 | Id EmitBindlessImageAtomicUMin32(EmitContext&); | 520 | Id EmitBindlessImageAtomicUMin32(EmitContext&); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 3501d7495..50277eec3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp | |||
| @@ -72,6 +72,14 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { | |||
| 72 | return ctx.OpIMul(ctx.U32[1], a, b); | 72 | return ctx.OpIMul(ctx.U32[1], a, b); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { | ||
| 76 | return ctx.OpSDiv(ctx.U32[1], a, b); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitUDiv32(EmitContext& ctx, Id a, Id b) { | ||
| 80 | return ctx.OpUDiv(ctx.U32[1], a, b); | ||
| 81 | } | ||
| 82 | |||
| 75 | Id EmitINeg32(EmitContext& ctx, Id value) { | 83 | Id EmitINeg32(EmitContext& ctx, Id value) { |
| 76 | return ctx.OpSNegate(ctx.U32[1], value); | 84 | return ctx.OpSNegate(ctx.U32[1], value); |
| 77 | } | 85 | } |
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 7c08b25ce..974efa4a0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp | |||
| @@ -22,6 +22,11 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { | |||
| 22 | PrependNewInst(end(), op, args); | 22 | PrependNewInst(end(), op, args); |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { | ||
| 26 | Inst* const inst{inst_pool->Create(base_inst)}; | ||
| 27 | return instructions.insert(insertion_point, *inst); | ||
| 28 | } | ||
| 29 | |||
| 25 | Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, | 30 | Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, |
| 26 | std::initializer_list<Value> args, u32 flags) { | 31 | std::initializer_list<Value> args, u32 flags) { |
| 27 | Inst* const inst{inst_pool->Create(op, flags)}; | 32 | Inst* const inst{inst_pool->Create(op, flags)}; |
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 9ce1ed07e..fbfe98266 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h | |||
| @@ -40,6 +40,9 @@ public: | |||
| 40 | /// Appends a new instruction to the end of this basic block. | 40 | /// Appends a new instruction to the end of this basic block. |
| 41 | void AppendNewInst(Opcode op, std::initializer_list<Value> args); | 41 | void AppendNewInst(Opcode op, std::initializer_list<Value> args); |
| 42 | 42 | ||
| 43 | /// Prepends a copy of an instruction to this basic block before the insertion point. | ||
| 44 | iterator PrependNewInst(iterator insertion_point, const Inst& base_inst); | ||
| 45 | |||
| 43 | /// Prepends a new instruction to this basic block before the insertion point. | 46 | /// Prepends a new instruction to this basic block before the insertion point. |
| 44 | iterator PrependNewInst(iterator insertion_point, Opcode op, | 47 | iterator PrependNewInst(iterator insertion_point, Opcode op, |
| 45 | std::initializer_list<Value> args = {}, u32 flags = 0); | 48 | std::initializer_list<Value> args = {}, u32 flags = 0); |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13159a68d..356f889ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -375,6 +375,10 @@ F32 IREmitter::YDirection() { | |||
| 375 | return Inst<F32>(Opcode::YDirection); | 375 | return Inst<F32>(Opcode::YDirection); |
| 376 | } | 376 | } |
| 377 | 377 | ||
| 378 | F32 IREmitter::ResolutionDownFactor() { | ||
| 379 | return Inst<F32>(Opcode::ResolutionDownFactor); | ||
| 380 | } | ||
| 381 | |||
| 378 | U32 IREmitter::LaneId() { | 382 | U32 IREmitter::LaneId() { |
| 379 | return Inst<U32>(Opcode::LaneId); | 383 | return Inst<U32>(Opcode::LaneId); |
| 380 | } | 384 | } |
| @@ -1141,6 +1145,10 @@ U32 IREmitter::IMul(const U32& a, const U32& b) { | |||
| 1141 | return Inst<U32>(Opcode::IMul32, a, b); | 1145 | return Inst<U32>(Opcode::IMul32, a, b); |
| 1142 | } | 1146 | } |
| 1143 | 1147 | ||
| 1148 | U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { | ||
| 1149 | return Inst<U32>(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b); | ||
| 1150 | } | ||
| 1151 | |||
| 1144 | U32U64 IREmitter::INeg(const U32U64& value) { | 1152 | U32U64 IREmitter::INeg(const U32U64& value) { |
| 1145 | switch (value.Type()) { | 1153 | switch (value.Type()) { |
| 1146 | case Type::U32: | 1154 | case Type::U32: |
| @@ -1938,6 +1946,14 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c | |||
| 1938 | return Inst(op, Flags{info}, handle, coords, value); | 1946 | return Inst(op, Flags{info}, handle, coords, value); |
| 1939 | } | 1947 | } |
| 1940 | 1948 | ||
| 1949 | U1 IREmitter::IsTextureScaled(const U32& index) { | ||
| 1950 | return Inst<U1>(Opcode::IsTextureScaled, index); | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | U1 IREmitter::IsImageScaled(const U32& index) { | ||
| 1954 | return Inst<U1>(Opcode::IsImageScaled, index); | ||
| 1955 | } | ||
| 1956 | |||
| 1941 | U1 IREmitter::VoteAll(const U1& value) { | 1957 | U1 IREmitter::VoteAll(const U1& value) { |
| 1942 | return Inst<U1>(Opcode::VoteAll, value); | 1958 | return Inst<U1>(Opcode::VoteAll, value); |
| 1943 | } | 1959 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1b89ca5a0..13eefa88b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -102,6 +102,8 @@ public: | |||
| 102 | [[nodiscard]] U1 IsHelperInvocation(); | 102 | [[nodiscard]] U1 IsHelperInvocation(); |
| 103 | [[nodiscard]] F32 YDirection(); | 103 | [[nodiscard]] F32 YDirection(); |
| 104 | 104 | ||
| 105 | [[nodiscard]] F32 ResolutionDownFactor(); | ||
| 106 | |||
| 105 | [[nodiscard]] U32 LaneId(); | 107 | [[nodiscard]] U32 LaneId(); |
| 106 | 108 | ||
| 107 | [[nodiscard]] U32 LoadGlobalU8(const U64& address); | 109 | [[nodiscard]] U32 LoadGlobalU8(const U64& address); |
| @@ -207,6 +209,7 @@ public: | |||
| 207 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | 209 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); |
| 208 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); | 210 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); |
| 209 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); | 211 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); |
| 212 | [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); | ||
| 210 | [[nodiscard]] U32U64 INeg(const U32U64& value); | 213 | [[nodiscard]] U32U64 INeg(const U32U64& value); |
| 211 | [[nodiscard]] U32 IAbs(const U32& value); | 214 | [[nodiscard]] U32 IAbs(const U32& value); |
| 212 | [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); | 215 | [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); |
| @@ -356,6 +359,10 @@ public: | |||
| 356 | TextureInstInfo info); | 359 | TextureInstInfo info); |
| 357 | [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, | 360 | [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, |
| 358 | const Value& value, TextureInstInfo info); | 361 | const Value& value, TextureInstInfo info); |
| 362 | |||
| 363 | [[nodiscard]] U1 IsTextureScaled(const U32& index); | ||
| 364 | [[nodiscard]] U1 IsImageScaled(const U32& index); | ||
| 365 | |||
| 359 | [[nodiscard]] U1 VoteAll(const U1& value); | 366 | [[nodiscard]] U1 VoteAll(const U1& value); |
| 360 | [[nodiscard]] U1 VoteAny(const U1& value); | 367 | [[nodiscard]] U1 VoteAny(const U1& value); |
| 361 | [[nodiscard]] U1 VoteEqual(const U1& value); | 368 | [[nodiscard]] U1 VoteEqual(const U1& value); |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 30b470bdd..97e2bf6af 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -47,6 +47,17 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | |||
| 47 | } | 47 | } |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} { | ||
| 51 | if (base.op == Opcode::Phi) { | ||
| 52 | throw NotImplementedException("Copying phi node"); | ||
| 53 | } | ||
| 54 | std::construct_at(&args); | ||
| 55 | const size_t num_args{base.NumArgs()}; | ||
| 56 | for (size_t index = 0; index < num_args; ++index) { | ||
| 57 | SetArg(index, base.Arg(index)); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 50 | Inst::~Inst() { | 61 | Inst::~Inst() { |
| 51 | if (op == Opcode::Phi) { | 62 | if (op == Opcode::Phi) { |
| 52 | std::destroy_at(&phi_args); | 63 | std::destroy_at(&phi_args); |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d91098c80..6929919df 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -62,6 +62,7 @@ OPCODE(InvocationId, U32, | |||
| 62 | OPCODE(SampleId, U32, ) | 62 | OPCODE(SampleId, U32, ) |
| 63 | OPCODE(IsHelperInvocation, U1, ) | 63 | OPCODE(IsHelperInvocation, U1, ) |
| 64 | OPCODE(YDirection, F32, ) | 64 | OPCODE(YDirection, F32, ) |
| 65 | OPCODE(ResolutionDownFactor, F32, ) | ||
| 65 | 66 | ||
| 66 | // Undefined | 67 | // Undefined |
| 67 | OPCODE(UndefU1, U1, ) | 68 | OPCODE(UndefU1, U1, ) |
| @@ -286,6 +287,8 @@ OPCODE(IAdd64, U64, U64, | |||
| 286 | OPCODE(ISub32, U32, U32, U32, ) | 287 | OPCODE(ISub32, U32, U32, U32, ) |
| 287 | OPCODE(ISub64, U64, U64, U64, ) | 288 | OPCODE(ISub64, U64, U64, U64, ) |
| 288 | OPCODE(IMul32, U32, U32, U32, ) | 289 | OPCODE(IMul32, U32, U32, U32, ) |
| 290 | OPCODE(SDiv32, U32, U32, U32, ) | ||
| 291 | OPCODE(UDiv32, U32, U32, U32, ) | ||
| 289 | OPCODE(INeg32, U32, U32, ) | 292 | OPCODE(INeg32, U32, U32, ) |
| 290 | OPCODE(INeg64, U64, U64, ) | 293 | OPCODE(INeg64, U64, U64, ) |
| 291 | OPCODE(IAbs32, U32, U32, ) | 294 | OPCODE(IAbs32, U32, U32, ) |
| @@ -490,6 +493,9 @@ OPCODE(ImageGradient, F32x4, Opaq | |||
| 490 | OPCODE(ImageRead, U32x4, Opaque, Opaque, ) | 493 | OPCODE(ImageRead, U32x4, Opaque, Opaque, ) |
| 491 | OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) | 494 | OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) |
| 492 | 495 | ||
| 496 | OPCODE(IsTextureScaled, U1, U32, ) | ||
| 497 | OPCODE(IsImageScaled, U1, U32, ) | ||
| 498 | |||
| 493 | // Atomic Image operations | 499 | // Atomic Image operations |
| 494 | 500 | ||
| 495 | OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) | 501 | OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) |
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 6c9ef6bdd..947579852 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | class Inst : public boost::intrusive::list_base_hook<> { | 116 | class Inst : public boost::intrusive::list_base_hook<> { |
| 117 | public: | 117 | public: |
| 118 | explicit Inst(IR::Opcode op_, u32 flags_) noexcept; | 118 | explicit Inst(IR::Opcode op_, u32 flags_) noexcept; |
| 119 | explicit Inst(const Inst& base); | ||
| 119 | ~Inst(); | 120 | ~Inst(); |
| 120 | 121 | ||
| 121 | Inst& operator=(const Inst&) = delete; | 122 | Inst& operator=(const Inst&) = delete; |
| 122 | Inst(const Inst&) = delete; | ||
| 123 | 123 | ||
| 124 | Inst& operator=(Inst&&) = delete; | 124 | Inst& operator=(Inst&&) = delete; |
| 125 | Inst(Inst&&) = delete; | 125 | Inst(Inst&&) = delete; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 2fc542f0e..267ebe4af 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -179,6 +179,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 179 | Optimization::TexturePass(env, program); | 179 | Optimization::TexturePass(env, program); |
| 180 | 180 | ||
| 181 | Optimization::ConstantPropagationPass(program); | 181 | Optimization::ConstantPropagationPass(program); |
| 182 | |||
| 183 | if (Settings::values.resolution_info.active) { | ||
| 184 | Optimization::RescalingPass(program); | ||
| 185 | } | ||
| 182 | Optimization::DeadCodeEliminationPass(program); | 186 | Optimization::DeadCodeEliminationPass(program); |
| 183 | if (Settings::values.renderer_debug) { | 187 | if (Settings::values.renderer_debug) { |
| 184 | Optimization::VerificationPass(program); | 188 | Optimization::VerificationPass(program); |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f69e1c9cc..1e476d83d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -430,6 +430,11 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 430 | case IR::Opcode::IsHelperInvocation: | 430 | case IR::Opcode::IsHelperInvocation: |
| 431 | info.uses_is_helper_invocation = true; | 431 | info.uses_is_helper_invocation = true; |
| 432 | break; | 432 | break; |
| 433 | case IR::Opcode::ResolutionDownFactor: | ||
| 434 | case IR::Opcode::IsTextureScaled: | ||
| 435 | case IR::Opcode::IsImageScaled: | ||
| 436 | info.uses_rescaling_uniform = true; | ||
| 437 | break; | ||
| 433 | case IR::Opcode::LaneId: | 438 | case IR::Opcode::LaneId: |
| 434 | info.uses_subgroup_invocation_id = true; | 439 | info.uses_subgroup_invocation_id = true; |
| 435 | break; | 440 | break; |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 2f89b1ea0..f877c7ba0 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -19,6 +19,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); | |||
| 19 | void IdentityRemovalPass(IR::Program& program); | 19 | void IdentityRemovalPass(IR::Program& program); |
| 20 | void LowerFp16ToFp32(IR::Program& program); | 20 | void LowerFp16ToFp32(IR::Program& program); |
| 21 | void LowerInt64ToInt32(IR::Program& program); | 21 | void LowerInt64ToInt32(IR::Program& program); |
| 22 | void RescalingPass(IR::Program& program); | ||
| 22 | void SsaRewritePass(IR::Program& program); | 23 | void SsaRewritePass(IR::Program& program); |
| 23 | void TexturePass(Environment& env, IR::Program& program); | 24 | void TexturePass(Environment& env, IR::Program& program); |
| 24 | void VerificationPass(const IR::Program& program); | 25 | void VerificationPass(const IR::Program& program); |
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp new file mode 100644 index 000000000..c28500dd1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp | |||
| @@ -0,0 +1,327 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/alignment.h" | ||
| 6 | #include "common/settings.h" | ||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 12 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | |||
| 15 | namespace Shader::Optimization { | ||
| 16 | namespace { | ||
| 17 | [[nodiscard]] bool IsTextureTypeRescalable(TextureType type) { | ||
| 18 | switch (type) { | ||
| 19 | case TextureType::Color2D: | ||
| 20 | case TextureType::ColorArray2D: | ||
| 21 | return true; | ||
| 22 | case TextureType::Color1D: | ||
| 23 | case TextureType::ColorArray1D: | ||
| 24 | case TextureType::Color3D: | ||
| 25 | case TextureType::ColorCube: | ||
| 26 | case TextureType::ColorArrayCube: | ||
| 27 | case TextureType::Buffer: | ||
| 28 | break; | ||
| 29 | } | ||
| 30 | return false; | ||
| 31 | } | ||
| 32 | |||
| 33 | void VisitMark(IR::Block& block, IR::Inst& inst) { | ||
| 34 | switch (inst.GetOpcode()) { | ||
| 35 | case IR::Opcode::ShuffleIndex: | ||
| 36 | case IR::Opcode::ShuffleUp: | ||
| 37 | case IR::Opcode::ShuffleDown: | ||
| 38 | case IR::Opcode::ShuffleButterfly: { | ||
| 39 | const IR::Value shfl_arg{inst.Arg(0)}; | ||
| 40 | if (shfl_arg.IsImmediate()) { | ||
| 41 | break; | ||
| 42 | } | ||
| 43 | const IR::Inst* const arg_inst{shfl_arg.InstRecursive()}; | ||
| 44 | if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) { | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | const IR::Value bitcast_arg{arg_inst->Arg(0)}; | ||
| 48 | if (bitcast_arg.IsImmediate()) { | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()}; | ||
| 52 | bool must_patch_outside = false; | ||
| 53 | if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) { | ||
| 54 | const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()}; | ||
| 55 | switch (attr) { | ||
| 56 | case IR::Attribute::PositionX: | ||
| 57 | case IR::Attribute::PositionY: | ||
| 58 | bitcast_inst->SetFlags<u32>(0xDEADBEEF); | ||
| 59 | must_patch_outside = true; | ||
| 60 | break; | ||
| 61 | default: | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | if (must_patch_outside) { | ||
| 66 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 67 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 68 | const IR::F32 new_inst{&*block.PrependNewInst(it, inst)}; | ||
| 69 | const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; | ||
| 70 | const IR::Value converted{ir.FPMul(new_inst, up_factor)}; | ||
| 71 | inst.ReplaceUsesWith(converted); | ||
| 72 | } | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | |||
| 76 | default: | ||
| 77 | break; | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void PatchFragCoord(IR::Block& block, IR::Inst& inst) { | ||
| 82 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 83 | const IR::F32 down_factor{ir.ResolutionDownFactor()}; | ||
| 84 | const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())}; | ||
| 85 | const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)}; | ||
| 86 | inst.ReplaceUsesWith(downscaled_frag_coord); | ||
| 87 | } | ||
| 88 | |||
| 89 | void PatchPointSize(IR::Block& block, IR::Inst& inst) { | ||
| 90 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 91 | const IR::F32 point_value{inst.Arg(1)}; | ||
| 92 | const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; | ||
| 93 | const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)}; | ||
| 94 | inst.SetArg(1, upscaled_point_value); | ||
| 95 | } | ||
| 96 | |||
| 97 | [[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { | ||
| 98 | IR::U32 scaled_value{value}; | ||
| 99 | if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { | ||
| 100 | scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale)); | ||
| 101 | } | ||
| 102 | if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { | ||
| 103 | scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift)); | ||
| 104 | } | ||
| 105 | return IR::U32{ir.Select(is_scaled, scaled_value, value)}; | ||
| 106 | } | ||
| 107 | |||
| 108 | [[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value, | ||
| 109 | const IR::Attribute attrib) { | ||
| 110 | const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)}; | ||
| 111 | const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)}; | ||
| 112 | const IR::F32 frag_coord{ir.GetAttribute(attrib)}; | ||
| 113 | const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)}; | ||
| 114 | const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))}; | ||
| 115 | const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))}; | ||
| 116 | return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)}; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { | ||
| 120 | IR::U32 scaled_value{value}; | ||
| 121 | if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { | ||
| 122 | scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift)); | ||
| 123 | } | ||
| 124 | if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { | ||
| 125 | scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale)); | ||
| 126 | } | ||
| 127 | return IR::U32{ir.Select(is_scaled, scaled_value, value)}; | ||
| 128 | } | ||
| 129 | |||
| 130 | void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) { | ||
| 131 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 132 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 133 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 134 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; | ||
| 135 | switch (info.type) { | ||
| 136 | case TextureType::Color2D: | ||
| 137 | case TextureType::ColorArray2D: { | ||
| 138 | const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; | ||
| 139 | const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})}; | ||
| 140 | const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})}; | ||
| 141 | const IR::Value replacement{ir.CompositeConstruct( | ||
| 142 | width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))}; | ||
| 143 | inst.ReplaceUsesWith(replacement); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case TextureType::Color1D: | ||
| 147 | case TextureType::ColorArray1D: | ||
| 148 | case TextureType::Color3D: | ||
| 149 | case TextureType::ColorCube: | ||
| 150 | case TextureType::ColorArrayCube: | ||
| 151 | case TextureType::Buffer: | ||
| 152 | // Nothing to patch here | ||
| 153 | break; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, | ||
| 158 | size_t index) { | ||
| 159 | const IR::Value composite{inst.Arg(index)}; | ||
| 160 | if (composite.IsEmpty()) { | ||
| 161 | return; | ||
| 162 | } | ||
| 163 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 164 | const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; | ||
| 165 | const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; | ||
| 166 | switch (info.type) { | ||
| 167 | case TextureType::Color2D: | ||
| 168 | inst.SetArg(index, ir.CompositeConstruct(x, y)); | ||
| 169 | break; | ||
| 170 | case TextureType::ColorArray2D: { | ||
| 171 | const IR::U32 z{ir.CompositeExtract(composite, 2)}; | ||
| 172 | inst.SetArg(index, ir.CompositeConstruct(x, y, z)); | ||
| 173 | break; | ||
| 174 | } | ||
| 175 | case TextureType::Color1D: | ||
| 176 | case TextureType::ColorArray1D: | ||
| 177 | case TextureType::Color3D: | ||
| 178 | case TextureType::ColorCube: | ||
| 179 | case TextureType::ColorArrayCube: | ||
| 180 | case TextureType::Buffer: | ||
| 181 | // Nothing to patch here | ||
| 182 | break; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { | ||
| 187 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 188 | const IR::Value coord{inst.Arg(1)}; | ||
| 189 | const IR::U32 coord_x{ir.CompositeExtract(coord, 0)}; | ||
| 190 | const IR::U32 coord_y{ir.CompositeExtract(coord, 1)}; | ||
| 191 | |||
| 192 | const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)}; | ||
| 193 | const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)}; | ||
| 194 | switch (info.type) { | ||
| 195 | case TextureType::Color2D: | ||
| 196 | inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y)); | ||
| 197 | break; | ||
| 198 | case TextureType::ColorArray2D: { | ||
| 199 | const IR::U32 z{ir.CompositeExtract(coord, 2)}; | ||
| 200 | inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z)); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case TextureType::Color1D: | ||
| 204 | case TextureType::ColorArray1D: | ||
| 205 | case TextureType::Color3D: | ||
| 206 | case TextureType::ColorCube: | ||
| 207 | case TextureType::ColorArrayCube: | ||
| 208 | case TextureType::Buffer: | ||
| 209 | // Nothing to patch here | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { | ||
| 215 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 216 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 217 | if (!IsTextureTypeRescalable(info.type)) { | ||
| 218 | return; | ||
| 219 | } | ||
| 220 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; | ||
| 221 | SubScaleCoord(ir, inst, is_scaled); | ||
| 222 | // Scale ImageFetch offset | ||
| 223 | ScaleIntegerComposite(ir, inst, is_scaled, 2); | ||
| 224 | } | ||
| 225 | |||
| 226 | void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { | ||
| 227 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 228 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 229 | if (!IsTextureTypeRescalable(info.type)) { | ||
| 230 | return; | ||
| 231 | } | ||
| 232 | const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; | ||
| 233 | SubScaleCoord(ir, inst, is_scaled); | ||
| 234 | } | ||
| 235 | |||
| 236 | void PatchImageFetch(IR::Block& block, IR::Inst& inst) { | ||
| 237 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 238 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 239 | if (!IsTextureTypeRescalable(info.type)) { | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; | ||
| 243 | ScaleIntegerComposite(ir, inst, is_scaled, 1); | ||
| 244 | // Scale ImageFetch offset | ||
| 245 | ScaleIntegerComposite(ir, inst, is_scaled, 2); | ||
| 246 | } | ||
| 247 | |||
| 248 | void PatchImageRead(IR::Block& block, IR::Inst& inst) { | ||
| 249 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 250 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 251 | if (!IsTextureTypeRescalable(info.type)) { | ||
| 252 | return; | ||
| 253 | } | ||
| 254 | const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; | ||
| 255 | ScaleIntegerComposite(ir, inst, is_scaled, 1); | ||
| 256 | } | ||
| 257 | |||
| 258 | void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { | ||
| 259 | const bool is_fragment_shader{program.stage == Stage::Fragment}; | ||
| 260 | switch (inst.GetOpcode()) { | ||
| 261 | case IR::Opcode::GetAttribute: { | ||
| 262 | const IR::Attribute attr{inst.Arg(0).Attribute()}; | ||
| 263 | switch (attr) { | ||
| 264 | case IR::Attribute::PositionX: | ||
| 265 | case IR::Attribute::PositionY: | ||
| 266 | if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) { | ||
| 267 | PatchFragCoord(block, inst); | ||
| 268 | } | ||
| 269 | break; | ||
| 270 | default: | ||
| 271 | break; | ||
| 272 | } | ||
| 273 | break; | ||
| 274 | } | ||
| 275 | case IR::Opcode::SetAttribute: { | ||
| 276 | const IR::Attribute attr{inst.Arg(0).Attribute()}; | ||
| 277 | switch (attr) { | ||
| 278 | case IR::Attribute::PointSize: | ||
| 279 | if (inst.Flags<u32>() != 0xDEADBEEF) { | ||
| 280 | PatchPointSize(block, inst); | ||
| 281 | } | ||
| 282 | break; | ||
| 283 | default: | ||
| 284 | break; | ||
| 285 | } | ||
| 286 | break; | ||
| 287 | } | ||
| 288 | case IR::Opcode::ImageQueryDimensions: | ||
| 289 | PatchImageQueryDimensions(block, inst); | ||
| 290 | break; | ||
| 291 | case IR::Opcode::ImageFetch: | ||
| 292 | if (is_fragment_shader) { | ||
| 293 | SubScaleImageFetch(block, inst); | ||
| 294 | } else { | ||
| 295 | PatchImageFetch(block, inst); | ||
| 296 | } | ||
| 297 | break; | ||
| 298 | case IR::Opcode::ImageRead: | ||
| 299 | if (is_fragment_shader) { | ||
| 300 | SubScaleImageRead(block, inst); | ||
| 301 | } else { | ||
| 302 | PatchImageRead(block, inst); | ||
| 303 | } | ||
| 304 | break; | ||
| 305 | default: | ||
| 306 | break; | ||
| 307 | } | ||
| 308 | } | ||
| 309 | } // Anonymous namespace | ||
| 310 | |||
| 311 | void RescalingPass(IR::Program& program) { | ||
| 312 | const bool is_fragment_shader{program.stage == Stage::Fragment}; | ||
| 313 | if (is_fragment_shader) { | ||
| 314 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 315 | for (IR::Inst& inst : block->Instructions()) { | ||
| 316 | VisitMark(*block, inst); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | } | ||
| 320 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 321 | for (IR::Inst& inst : block->Instructions()) { | ||
| 322 | Visit(program, *block, inst); | ||
| 323 | } | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4ef4dbd40..9f375c30e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -172,6 +172,7 @@ struct Info { | |||
| 172 | bool uses_global_memory{}; | 172 | bool uses_global_memory{}; |
| 173 | bool uses_atomic_image_u32{}; | 173 | bool uses_atomic_image_u32{}; |
| 174 | bool uses_shadow_lod{}; | 174 | bool uses_shadow_lod{}; |
| 175 | bool uses_rescaling_uniform{}; | ||
| 175 | 176 | ||
| 176 | IR::Type used_constant_buffer_types{}; | 177 | IR::Type used_constant_buffer_types{}; |
| 177 | IR::Type used_storage_buffer_types{}; | 178 | IR::Type used_storage_buffer_types{}; |
| @@ -190,4 +191,13 @@ struct Info { | |||
| 190 | ImageDescriptors image_descriptors; | 191 | ImageDescriptors image_descriptors; |
| 191 | }; | 192 | }; |
| 192 | 193 | ||
| 194 | template <typename Descriptors> | ||
| 195 | u32 NumDescriptors(const Descriptors& descriptors) { | ||
| 196 | u32 num{}; | ||
| 197 | for (const auto& desc : descriptors) { | ||
| 198 | num += desc.count; | ||
| 199 | } | ||
| 200 | return num; | ||
| 201 | } | ||
| 202 | |||
| 193 | } // namespace Shader | 203 | } // namespace Shader |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6aac7f305..91a30fef7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -132,6 +132,8 @@ add_library(video_core STATIC | |||
| 132 | renderer_vulkan/vk_descriptor_pool.h | 132 | renderer_vulkan/vk_descriptor_pool.h |
| 133 | renderer_vulkan/vk_fence_manager.cpp | 133 | renderer_vulkan/vk_fence_manager.cpp |
| 134 | renderer_vulkan/vk_fence_manager.h | 134 | renderer_vulkan/vk_fence_manager.h |
| 135 | renderer_vulkan/vk_fsr.cpp | ||
| 136 | renderer_vulkan/vk_fsr.h | ||
| 135 | renderer_vulkan/vk_graphics_pipeline.cpp | 137 | renderer_vulkan/vk_graphics_pipeline.cpp |
| 136 | renderer_vulkan/vk_graphics_pipeline.h | 138 | renderer_vulkan/vk_graphics_pipeline.h |
| 137 | renderer_vulkan/vk_master_semaphore.cpp | 139 | renderer_vulkan/vk_master_semaphore.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d350c9b36..43bed63ac 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -853,12 +853,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 853 | } | 853 | } |
| 854 | if constexpr (USE_MEMORY_MAPS) { | 854 | if constexpr (USE_MEMORY_MAPS) { |
| 855 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 855 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 856 | runtime.PreCopyBarrier(); | ||
| 856 | for (auto& [copy, buffer_id] : downloads) { | 857 | for (auto& [copy, buffer_id] : downloads) { |
| 857 | // Have in mind the staging buffer offset for the copy | 858 | // Have in mind the staging buffer offset for the copy |
| 858 | copy.dst_offset += download_staging.offset; | 859 | copy.dst_offset += download_staging.offset; |
| 859 | const std::array copies{copy}; | 860 | const std::array copies{copy}; |
| 860 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); | 861 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); |
| 861 | } | 862 | } |
| 863 | runtime.PostCopyBarrier(); | ||
| 862 | runtime.Finish(); | 864 | runtime.Finish(); |
| 863 | for (const auto& [copy, buffer_id] : downloads) { | 865 | for (const auto& [copy, buffer_id] : downloads) { |
| 864 | const Buffer& buffer = slot_buffers[buffer_id]; | 866 | const Buffer& buffer = slot_buffers[buffer_id]; |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index f0d545f90..d63ad5a35 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -29,6 +29,8 @@ enum : u8 { | |||
| 29 | ColorBuffer6, | 29 | ColorBuffer6, |
| 30 | ColorBuffer7, | 30 | ColorBuffer7, |
| 31 | ZetaBuffer, | 31 | ZetaBuffer, |
| 32 | RescaleViewports, | ||
| 33 | RescaleScissors, | ||
| 32 | 34 | ||
| 33 | VertexBuffers, | 35 | VertexBuffers, |
| 34 | VertexBuffer0, | 36 | VertexBuffer0, |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 20d748c12..d779a967a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -1,3 +1,11 @@ | |||
| 1 | set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) | ||
| 2 | |||
| 3 | set(GLSL_INCLUDES | ||
| 4 | fidelityfx_fsr.comp | ||
| 5 | ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h | ||
| 6 | ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h | ||
| 7 | ) | ||
| 8 | |||
| 1 | set(SHADER_FILES | 9 | set(SHADER_FILES |
| 2 | astc_decoder.comp | 10 | astc_decoder.comp |
| 3 | block_linear_unswizzle_2d.comp | 11 | block_linear_unswizzle_2d.comp |
| @@ -5,14 +13,25 @@ set(SHADER_FILES | |||
| 5 | convert_depth_to_float.frag | 13 | convert_depth_to_float.frag |
| 6 | convert_float_to_depth.frag | 14 | convert_float_to_depth.frag |
| 7 | full_screen_triangle.vert | 15 | full_screen_triangle.vert |
| 16 | fxaa.frag | ||
| 17 | fxaa.vert | ||
| 8 | opengl_copy_bc4.comp | 18 | opengl_copy_bc4.comp |
| 9 | opengl_present.frag | 19 | opengl_present.frag |
| 10 | opengl_present.vert | 20 | opengl_present.vert |
| 21 | opengl_present_scaleforce.frag | ||
| 11 | pitch_unswizzle.comp | 22 | pitch_unswizzle.comp |
| 23 | present_bicubic.frag | ||
| 24 | present_gaussian.frag | ||
| 12 | vulkan_blit_color_float.frag | 25 | vulkan_blit_color_float.frag |
| 13 | vulkan_blit_depth_stencil.frag | 26 | vulkan_blit_depth_stencil.frag |
| 27 | vulkan_fidelityfx_fsr_easu_fp16.comp | ||
| 28 | vulkan_fidelityfx_fsr_easu_fp32.comp | ||
| 29 | vulkan_fidelityfx_fsr_rcas_fp16.comp | ||
| 30 | vulkan_fidelityfx_fsr_rcas_fp32.comp | ||
| 14 | vulkan_present.frag | 31 | vulkan_present.frag |
| 15 | vulkan_present.vert | 32 | vulkan_present.vert |
| 33 | vulkan_present_scaleforce_fp16.frag | ||
| 34 | vulkan_present_scaleforce_fp32.frag | ||
| 16 | vulkan_quad_indexed.comp | 35 | vulkan_quad_indexed.comp |
| 17 | vulkan_uint8.comp | 36 | vulkan_uint8.comp |
| 18 | ) | 37 | ) |
| @@ -76,7 +95,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) | |||
| 76 | OUTPUT | 95 | OUTPUT |
| 77 | ${SPIRV_HEADER_FILE} | 96 | ${SPIRV_HEADER_FILE} |
| 78 | COMMAND | 97 | COMMAND |
| 79 | ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | 98 | ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} |
| 80 | MAIN_DEPENDENCY | 99 | MAIN_DEPENDENCY |
| 81 | ${SOURCE_FILE} | 100 | ${SOURCE_FILE} |
| 82 | ) | 101 | ) |
| @@ -84,9 +103,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) | |||
| 84 | endif() | 103 | endif() |
| 85 | endforeach() | 104 | endforeach() |
| 86 | 105 | ||
| 106 | set(SHADER_SOURCES ${SHADER_FILES}) | ||
| 107 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) | ||
| 108 | |||
| 87 | add_custom_target(host_shaders | 109 | add_custom_target(host_shaders |
| 88 | DEPENDS | 110 | DEPENDS |
| 89 | ${SHADER_HEADERS} | 111 | ${SHADER_HEADERS} |
| 90 | SOURCES | 112 | SOURCES |
| 91 | ${SHADER_FILES} | 113 | ${SHADER_SOURCES} |
| 92 | ) | 114 | ) |
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp new file mode 100644 index 000000000..6b97f789d --- /dev/null +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | //!#version 460 core | ||
| 6 | #extension GL_ARB_separate_shader_objects : enable | ||
| 7 | #extension GL_ARB_shading_language_420pack : enable | ||
| 8 | #extension GL_GOOGLE_include_directive : enable | ||
| 9 | #extension GL_EXT_shader_explicit_arithmetic_types : require | ||
| 10 | |||
| 11 | // FidelityFX Super Resolution Sample | ||
| 12 | // | ||
| 13 | // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. | ||
| 14 | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 15 | // of this software and associated documentation files(the "Software"), to deal | ||
| 16 | // in the Software without restriction, including without limitation the rights | ||
| 17 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell | ||
| 18 | // copies of the Software, and to permit persons to whom the Software is | ||
| 19 | // furnished to do so, subject to the following conditions : | ||
| 20 | // The above copyright notice and this permission notice shall be included in | ||
| 21 | // all copies or substantial portions of the Software. | ||
| 22 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 23 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 24 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE | ||
| 25 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 26 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 27 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 28 | // THE SOFTWARE. | ||
| 29 | |||
| 30 | layout( push_constant ) uniform constants { | ||
| 31 | uvec4 Const0; | ||
| 32 | uvec4 Const1; | ||
| 33 | uvec4 Const2; | ||
| 34 | uvec4 Const3; | ||
| 35 | }; | ||
| 36 | |||
| 37 | layout(set=0,binding=0) uniform sampler2D InputTexture; | ||
| 38 | layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; | ||
| 39 | |||
| 40 | #define A_GPU 1 | ||
| 41 | #define A_GLSL 1 | ||
| 42 | |||
| 43 | #ifndef YUZU_USE_FP16 | ||
| 44 | #include "ffx_a.h" | ||
| 45 | |||
| 46 | #if USE_EASU | ||
| 47 | #define FSR_EASU_F 1 | ||
| 48 | AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } | ||
| 49 | AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } | ||
| 50 | AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } | ||
| 51 | #endif | ||
| 52 | #if USE_RCAS | ||
| 53 | #define FSR_RCAS_F 1 | ||
| 54 | AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } | ||
| 55 | void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} | ||
| 56 | #endif | ||
| 57 | #else | ||
| 58 | #define A_HALF | ||
| 59 | #include "ffx_a.h" | ||
| 60 | |||
| 61 | #if USE_EASU | ||
| 62 | #define FSR_EASU_H 1 | ||
| 63 | AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } | ||
| 64 | AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } | ||
| 65 | AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } | ||
| 66 | #endif | ||
| 67 | #if USE_RCAS | ||
| 68 | #define FSR_RCAS_H 1 | ||
| 69 | AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } | ||
| 70 | void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} | ||
| 71 | #endif | ||
| 72 | #endif | ||
| 73 | |||
| 74 | #include "ffx_fsr1.h" | ||
| 75 | |||
| 76 | void CurrFilter(AU2 pos) { | ||
| 77 | #if USE_BILINEAR | ||
| 78 | AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); | ||
| 79 | imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); | ||
| 80 | #endif | ||
| 81 | #if USE_EASU | ||
| 82 | #ifndef YUZU_USE_FP16 | ||
| 83 | AF3 c; | ||
| 84 | FsrEasuF(c, pos, Const0, Const1, Const2, Const3); | ||
| 85 | imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); | ||
| 86 | #else | ||
| 87 | AH3 c; | ||
| 88 | FsrEasuH(c, pos, Const0, Const1, Const2, Const3); | ||
| 89 | imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); | ||
| 90 | #endif | ||
| 91 | #endif | ||
| 92 | #if USE_RCAS | ||
| 93 | #ifndef YUZU_USE_FP16 | ||
| 94 | AF3 c; | ||
| 95 | FsrRcasF(c.r, c.g, c.b, pos, Const0); | ||
| 96 | imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); | ||
| 97 | #else | ||
| 98 | AH3 c; | ||
| 99 | FsrRcasH(c.r, c.g, c.b, pos, Const0); | ||
| 100 | imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); | ||
| 101 | #endif | ||
| 102 | #endif | ||
| 103 | } | ||
| 104 | |||
| 105 | layout(local_size_x=64) in; | ||
| 106 | void main() { | ||
| 107 | // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. | ||
| 108 | AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); | ||
| 109 | CurrFilter(gxy); | ||
| 110 | gxy.x += 8u; | ||
| 111 | CurrFilter(gxy); | ||
| 112 | gxy.y += 8u; | ||
| 113 | CurrFilter(gxy); | ||
| 114 | gxy.x -= 8u; | ||
| 115 | CurrFilter(gxy); | ||
| 116 | } | ||
diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag new file mode 100644 index 000000000..02f4068d1 --- /dev/null +++ b/src/video_core/host_shaders/fxaa.frag | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // Source code is adapted from | ||
| 6 | // https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/ | ||
| 7 | |||
| 8 | #version 460 | ||
| 9 | |||
| 10 | #ifdef VULKAN | ||
| 11 | |||
| 12 | #define BINDING_COLOR_TEXTURE 1 | ||
| 13 | |||
| 14 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 15 | |||
| 16 | #define BINDING_COLOR_TEXTURE 0 | ||
| 17 | |||
| 18 | #endif | ||
| 19 | |||
| 20 | layout (location = 0) in vec4 posPos; | ||
| 21 | |||
| 22 | layout (location = 0) out vec4 frag_color; | ||
| 23 | |||
| 24 | layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; | ||
| 25 | |||
| 26 | const float FXAA_SPAN_MAX = 8.0; | ||
| 27 | const float FXAA_REDUCE_MUL = 1.0 / 8.0; | ||
| 28 | const float FXAA_REDUCE_MIN = 1.0 / 128.0; | ||
| 29 | |||
| 30 | #define FxaaTexLod0(t, p) textureLod(t, p, 0.0) | ||
| 31 | #define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o) | ||
| 32 | |||
| 33 | vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) { | ||
| 34 | |||
| 35 | vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz; | ||
| 36 | vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz; | ||
| 37 | vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz; | ||
| 38 | vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz; | ||
| 39 | vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz; | ||
| 40 | /*---------------------------------------------------------*/ | ||
| 41 | vec3 luma = vec3(0.299, 0.587, 0.114); | ||
| 42 | float lumaNW = dot(rgbNW, luma); | ||
| 43 | float lumaNE = dot(rgbNE, luma); | ||
| 44 | float lumaSW = dot(rgbSW, luma); | ||
| 45 | float lumaSE = dot(rgbSE, luma); | ||
| 46 | float lumaM = dot(rgbM, luma); | ||
| 47 | /*---------------------------------------------------------*/ | ||
| 48 | float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); | ||
| 49 | float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); | ||
| 50 | /*---------------------------------------------------------*/ | ||
| 51 | vec2 dir; | ||
| 52 | dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); | ||
| 53 | dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); | ||
| 54 | /*---------------------------------------------------------*/ | ||
| 55 | float dirReduce = max( | ||
| 56 | (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL), | ||
| 57 | FXAA_REDUCE_MIN); | ||
| 58 | float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce); | ||
| 59 | dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX), | ||
| 60 | max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), | ||
| 61 | dir * rcpDirMin)) / textureSize(tex, 0); | ||
| 62 | /*--------------------------------------------------------*/ | ||
| 63 | vec3 rgbA = (1.0 / 2.0) * ( | ||
| 64 | FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz + | ||
| 65 | FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz); | ||
| 66 | vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * ( | ||
| 67 | FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz + | ||
| 68 | FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz); | ||
| 69 | float lumaB = dot(rgbB, luma); | ||
| 70 | if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA; | ||
| 71 | return rgbB; | ||
| 72 | } | ||
| 73 | |||
| 74 | void main() { | ||
| 75 | frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0); | ||
| 76 | } | ||
diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert new file mode 100644 index 000000000..ac20c04e9 --- /dev/null +++ b/src/video_core/host_shaders/fxaa.vert | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 | ||
| 6 | |||
| 7 | out gl_PerVertex { | ||
| 8 | vec4 gl_Position; | ||
| 9 | }; | ||
| 10 | |||
| 11 | const vec2 vertices[4] = | ||
| 12 | vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); | ||
| 13 | |||
| 14 | layout (location = 0) out vec4 posPos; | ||
| 15 | |||
| 16 | #ifdef VULKAN | ||
| 17 | |||
| 18 | #define BINDING_COLOR_TEXTURE 0 | ||
| 19 | #define VERTEX_ID gl_VertexIndex | ||
| 20 | |||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 22 | |||
| 23 | #define BINDING_COLOR_TEXTURE 0 | ||
| 24 | #define VERTEX_ID gl_VertexID | ||
| 25 | |||
| 26 | #endif | ||
| 27 | |||
| 28 | layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; | ||
| 29 | |||
| 30 | const float FXAA_SUBPIX_SHIFT = 0; | ||
| 31 | |||
| 32 | void main() { | ||
| 33 | vec2 vertex = vertices[VERTEX_ID]; | ||
| 34 | gl_Position = vec4(vertex, 0.0, 1.0); | ||
| 35 | vec2 vert_tex_coord = (vertex + 1.0) / 2.0; | ||
| 36 | posPos.xy = vert_tex_coord; | ||
| 37 | posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0); | ||
| 38 | } | ||
diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag new file mode 100644 index 000000000..71ff9e1e3 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag | |||
| @@ -0,0 +1,130 @@ | |||
| 1 | // MIT License | ||
| 2 | // | ||
| 3 | // Copyright (c) 2020 BreadFish64 | ||
| 4 | // | ||
| 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 6 | // of this software and associated documentation files (the "Software"), to deal | ||
| 7 | // in the Software without restriction, including without limitation the rights | ||
| 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 9 | // copies of the Software, and to permit persons to whom the Software is | ||
| 10 | // furnished to do so, subject to the following conditions: | ||
| 11 | // | ||
| 12 | // The above copyright notice and this permission notice shall be included in all | ||
| 13 | // copies or substantial portions of the Software. | ||
| 14 | // | ||
| 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| 21 | // SOFTWARE. | ||
| 22 | |||
| 23 | // Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce | ||
| 24 | |||
| 25 | //! #version 460 | ||
| 26 | |||
| 27 | #extension GL_ARB_separate_shader_objects : enable | ||
| 28 | |||
| 29 | #ifdef YUZU_USE_FP16 | ||
| 30 | |||
| 31 | #extension GL_AMD_gpu_shader_half_float : enable | ||
| 32 | #extension GL_NV_gpu_shader5 : enable | ||
| 33 | |||
| 34 | #define lfloat float16_t | ||
| 35 | #define lvec2 f16vec2 | ||
| 36 | #define lvec3 f16vec3 | ||
| 37 | #define lvec4 f16vec4 | ||
| 38 | |||
| 39 | #else | ||
| 40 | |||
| 41 | #define lfloat float | ||
| 42 | #define lvec2 vec2 | ||
| 43 | #define lvec3 vec3 | ||
| 44 | #define lvec4 vec4 | ||
| 45 | |||
| 46 | #endif | ||
| 47 | |||
| 48 | #ifdef VULKAN | ||
| 49 | |||
| 50 | #define BINDING_COLOR_TEXTURE 1 | ||
| 51 | |||
| 52 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 53 | |||
| 54 | #define BINDING_COLOR_TEXTURE 0 | ||
| 55 | |||
| 56 | #endif | ||
| 57 | |||
| 58 | layout (location = 0) in vec2 tex_coord; | ||
| 59 | |||
| 60 | layout (location = 0) out vec4 frag_color; | ||
| 61 | |||
| 62 | layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; | ||
| 63 | |||
| 64 | const bool ignore_alpha = true; | ||
| 65 | |||
| 66 | lfloat ColorDist1(lvec4 a, lvec4 b) { | ||
| 67 | // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion | ||
| 68 | const lvec3 K = lvec3(0.2627, 0.6780, 0.0593); | ||
| 69 | const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b); | ||
| 70 | const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r); | ||
| 71 | lvec4 diff = a - b; | ||
| 72 | lfloat Y = dot(diff.rgb, K); | ||
| 73 | lfloat Cb = scaleB * (diff.b - Y); | ||
| 74 | lfloat Cr = scaleR * (diff.r - Y); | ||
| 75 | lvec3 YCbCr = lvec3(Y, Cb, Cr); | ||
| 76 | lfloat d = length(YCbCr); | ||
| 77 | if (ignore_alpha) { | ||
| 78 | return d; | ||
| 79 | } | ||
| 80 | return sqrt(a.a * b.a * d * d + diff.a * diff.a); | ||
| 81 | } | ||
| 82 | |||
| 83 | lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) { | ||
| 84 | return lvec4( | ||
| 85 | ColorDist1(ref, A), | ||
| 86 | ColorDist1(ref, B), | ||
| 87 | ColorDist1(ref, C), | ||
| 88 | ColorDist1(ref, D) | ||
| 89 | ); | ||
| 90 | } | ||
| 91 | |||
| 92 | vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { | ||
| 93 | lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); | ||
| 94 | lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1))); | ||
| 95 | lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1))); | ||
| 96 | lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); | ||
| 97 | lvec4 cc = lvec4(texture(tex, tex_coord)); | ||
| 98 | lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0))); | ||
| 99 | lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); | ||
| 100 | lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1))); | ||
| 101 | lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1))); | ||
| 102 | |||
| 103 | lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); | ||
| 104 | lvec4 offset_br = ColorDist(cc, br, bc, bl, cl); | ||
| 105 | |||
| 106 | // Calculate how different cc is from the texels around it | ||
| 107 | const lfloat plus_weight = lfloat(1.5); | ||
| 108 | const lfloat cross_weight = lfloat(1.5); | ||
| 109 | lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight)); | ||
| 110 | |||
| 111 | if (total_dist == lfloat(0.0)) { | ||
| 112 | return cc; | ||
| 113 | } else { | ||
| 114 | // Add together all the distances with direction taken into account | ||
| 115 | lvec4 tmp = offset_tl - offset_br; | ||
| 116 | lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight; | ||
| 117 | |||
| 118 | // When the image has thin points, they tend to split apart. | ||
| 119 | // This is because the texels all around are different and total_offset reaches into clear areas. | ||
| 120 | // This works pretty well to keep the offset in bounds for these cases. | ||
| 121 | lfloat clamp_val = length(total_offset) / total_dist; | ||
| 122 | vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0); | ||
| 123 | |||
| 124 | return texture(tex, tex_coord - final_offset); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void main() { | ||
| 129 | frag_color = Scaleforce(input_texture, tex_coord); | ||
| 130 | } | ||
diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag new file mode 100644 index 000000000..902b70c2b --- /dev/null +++ b/src/video_core/host_shaders/present_bicubic.frag | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #define BINDING_COLOR_TEXTURE 1 | ||
| 10 | |||
| 11 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 12 | |||
| 13 | #define BINDING_COLOR_TEXTURE 0 | ||
| 14 | |||
| 15 | #endif | ||
| 16 | |||
| 17 | |||
| 18 | layout (location = 0) in vec2 frag_tex_coord; | ||
| 19 | |||
| 20 | layout (location = 0) out vec4 color; | ||
| 21 | |||
| 22 | layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; | ||
| 23 | |||
| 24 | vec4 cubic(float v) { | ||
| 25 | vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; | ||
| 26 | vec4 s = n * n * n; | ||
| 27 | float x = s.x; | ||
| 28 | float y = s.y - 4.0 * s.x; | ||
| 29 | float z = s.z - 4.0 * s.y + 6.0 * s.x; | ||
| 30 | float w = 6.0 - x - y - z; | ||
| 31 | return vec4(x, y, z, w) * (1.0 / 6.0); | ||
| 32 | } | ||
| 33 | |||
| 34 | vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { | ||
| 35 | |||
| 36 | vec2 texSize = textureSize(textureSampler, 0); | ||
| 37 | vec2 invTexSize = 1.0 / texSize; | ||
| 38 | |||
| 39 | texCoords = texCoords * texSize - 0.5; | ||
| 40 | |||
| 41 | vec2 fxy = fract(texCoords); | ||
| 42 | texCoords -= fxy; | ||
| 43 | |||
| 44 | vec4 xcubic = cubic(fxy.x); | ||
| 45 | vec4 ycubic = cubic(fxy.y); | ||
| 46 | |||
| 47 | vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; | ||
| 48 | |||
| 49 | vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); | ||
| 50 | vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; | ||
| 51 | |||
| 52 | offset *= invTexSize.xxyy; | ||
| 53 | |||
| 54 | vec4 sample0 = texture(textureSampler, offset.xz); | ||
| 55 | vec4 sample1 = texture(textureSampler, offset.yz); | ||
| 56 | vec4 sample2 = texture(textureSampler, offset.xw); | ||
| 57 | vec4 sample3 = texture(textureSampler, offset.yw); | ||
| 58 | |||
| 59 | float sx = s.x / (s.x + s.y); | ||
| 60 | float sy = s.z / (s.z + s.w); | ||
| 61 | |||
| 62 | return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); | ||
| 63 | } | ||
| 64 | |||
| 65 | void main() { | ||
| 66 | color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); | ||
| 67 | } | ||
diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag new file mode 100644 index 000000000..66fed3238 --- /dev/null +++ b/src/video_core/host_shaders/present_gaussian.frag | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // Code adapted from the following sources: | ||
| 6 | // - https://learnopengl.com/Advanced-Lighting/Bloom | ||
| 7 | // - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/ | ||
| 8 | |||
| 9 | #version 460 core | ||
| 10 | |||
| 11 | #ifdef VULKAN | ||
| 12 | |||
| 13 | #define BINDING_COLOR_TEXTURE 1 | ||
| 14 | |||
| 15 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 16 | |||
| 17 | #define BINDING_COLOR_TEXTURE 0 | ||
| 18 | |||
| 19 | #endif | ||
| 20 | |||
| 21 | layout(location = 0) in vec2 frag_tex_coord; | ||
| 22 | |||
| 23 | layout(location = 0) out vec4 color; | ||
| 24 | |||
| 25 | layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; | ||
| 26 | |||
| 27 | const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); | ||
| 28 | const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); | ||
| 29 | |||
| 30 | vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) { | ||
| 31 | vec4 result = vec4(0.0f); | ||
| 32 | for (int i = 1; i < 3; i++) { | ||
| 33 | result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i]; | ||
| 34 | result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i]; | ||
| 35 | } | ||
| 36 | return result; | ||
| 37 | } | ||
| 38 | |||
| 39 | vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) { | ||
| 40 | vec4 result = vec4(0.0f); | ||
| 41 | for (int i = 1; i < 3; i++) { | ||
| 42 | result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i]; | ||
| 43 | result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i]; | ||
| 44 | } | ||
| 45 | return result; | ||
| 46 | } | ||
| 47 | |||
| 48 | vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { | ||
| 49 | vec4 result = vec4(0.0f); | ||
| 50 | for (int i = 1; i < 3; i++) { | ||
| 51 | result += | ||
| 52 | texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i]; | ||
| 53 | result += | ||
| 54 | texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i]; | ||
| 55 | } | ||
| 56 | return result; | ||
| 57 | } | ||
| 58 | |||
| 59 | void main() { | ||
| 60 | vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; | ||
| 61 | vec2 tex_offset = 1.0f / textureSize(color_texture, 0); | ||
| 62 | |||
| 63 | // TODO(Blinkhawk): This code can be optimized through shader group instructions. | ||
| 64 | vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; | ||
| 65 | vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; | ||
| 66 | vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb; | ||
| 67 | vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; | ||
| 68 | vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); | ||
| 69 | color = vec4(combination + base, 1.0f); | ||
| 70 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp new file mode 100644 index 000000000..1c96a7905 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | #extension GL_GOOGLE_include_directive : enable | ||
| 7 | |||
| 8 | #define YUZU_USE_FP16 | ||
| 9 | #define USE_EASU 1 | ||
| 10 | |||
| 11 | #include "fidelityfx_fsr.comp" | ||
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp new file mode 100644 index 000000000..f4daff739 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | #extension GL_GOOGLE_include_directive : enable | ||
| 7 | |||
| 8 | #define USE_EASU 1 | ||
| 9 | |||
| 10 | #include "fidelityfx_fsr.comp" | ||
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp new file mode 100644 index 000000000..6b6796dd1 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | #extension GL_GOOGLE_include_directive : enable | ||
| 7 | |||
| 8 | #define YUZU_USE_FP16 | ||
| 9 | #define USE_RCAS 1 | ||
| 10 | |||
| 11 | #include "fidelityfx_fsr.comp" | ||
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp new file mode 100644 index 000000000..f785eebf3 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | #extension GL_GOOGLE_include_directive : enable | ||
| 7 | |||
| 8 | #define USE_RCAS 1 | ||
| 9 | |||
| 10 | #include "fidelityfx_fsr.comp" | ||
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag new file mode 100644 index 000000000..924c03060 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | #version 460 | ||
| 2 | |||
| 3 | #extension GL_GOOGLE_include_directive : enable | ||
| 4 | |||
| 5 | #define YUZU_USE_FP16 | ||
| 6 | |||
| 7 | #include "opengl_present_scaleforce.frag" | ||
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag new file mode 100644 index 000000000..a594b83ca --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #version 460 | ||
| 2 | |||
| 3 | #extension GL_GOOGLE_include_directive : enable | ||
| 4 | |||
| 5 | #include "opengl_present_scaleforce.frag" | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 187a28e4d..d4dd10bb6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <span> | 6 | #include <span> |
| 7 | 7 | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm.h" | ||
| 8 | #include "video_core/buffer_cache/buffer_cache.h" | 9 | #include "video_core/buffer_cache/buffer_cache.h" |
| 9 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 10 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 10 | #include "video_core/renderer_opengl/gl_device.h" | 11 | #include "video_core/renderer_opengl/gl_device.h" |
| @@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff | |||
| 229 | .padding = 0, | 230 | .padding = 0, |
| 230 | }; | 231 | }; |
| 231 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | 232 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); |
| 232 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | 233 | glProgramLocalParametersI4uivNV( |
| 233 | reinterpret_cast<const GLuint*>(&ssbo)); | 234 | PROGRAM_LUT[stage], |
| 235 | Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, | ||
| 236 | reinterpret_cast<const GLuint*>(&ssbo)); | ||
| 234 | } | 237 | } |
| 235 | } | 238 | } |
| 236 | 239 | ||
| @@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf | |||
| 250 | .padding = 0, | 253 | .padding = 0, |
| 251 | }; | 254 | }; |
| 252 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | 255 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); |
| 253 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, | 256 | glProgramLocalParametersI4uivNV( |
| 254 | reinterpret_cast<const GLuint*>(&ssbo)); | 257 | GL_COMPUTE_PROGRAM_NV, |
| 258 | Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, | ||
| 259 | reinterpret_cast<const GLuint*>(&ssbo)); | ||
| 255 | } | 260 | } |
| 256 | } | 261 | } |
| 257 | 262 | ||
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index aa1cc592f..5c1f21c65 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp | |||
| @@ -19,15 +19,6 @@ using VideoCommon::ImageId; | |||
| 19 | constexpr u32 MAX_TEXTURES = 64; | 19 | constexpr u32 MAX_TEXTURES = 64; |
| 20 | constexpr u32 MAX_IMAGES = 16; | 20 | constexpr u32 MAX_IMAGES = 16; |
| 21 | 21 | ||
| 22 | template <typename Range> | ||
| 23 | u32 AccumulateCount(const Range& range) { | ||
| 24 | u32 num{}; | ||
| 25 | for (const auto& desc : range) { | ||
| 26 | num += desc.count; | ||
| 27 | } | ||
| 28 | return num; | ||
| 29 | } | ||
| 30 | |||
| 31 | size_t ComputePipelineKey::Hash() const noexcept { | 22 | size_t ComputePipelineKey::Hash() const noexcept { |
| 32 | return static_cast<size_t>( | 23 | return static_cast<size_t>( |
| 33 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); | 24 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); |
| @@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac | |||
| 58 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), | 49 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), |
| 59 | uniform_buffer_sizes.begin()); | 50 | uniform_buffer_sizes.begin()); |
| 60 | 51 | ||
| 61 | num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); | 52 | num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); |
| 62 | num_image_buffers = AccumulateCount(info.image_buffer_descriptors); | 53 | num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); |
| 63 | 54 | ||
| 64 | const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; | 55 | const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)}; |
| 65 | ASSERT(num_textures <= MAX_TEXTURES); | 56 | ASSERT(num_textures <= MAX_TEXTURES); |
| 66 | 57 | ||
| 67 | const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; | 58 | const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)}; |
| 68 | ASSERT(num_images <= MAX_IMAGES); | 59 | ASSERT(num_images <= MAX_IMAGES); |
| 69 | 60 | ||
| 70 | const bool is_glasm{assembly_program.handle != 0}; | 61 | const bool is_glasm{assembly_program.handle != 0}; |
| 71 | const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; | 62 | const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)}; |
| 72 | use_storage_buffers = | 63 | use_storage_buffers = |
| 73 | !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); | 64 | !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); |
| 74 | writes_global_memory = !use_storage_buffers && | 65 | writes_global_memory = !use_storage_buffers && |
| @@ -88,8 +79,7 @@ void ComputePipeline::Configure() { | |||
| 88 | } | 79 | } |
| 89 | texture_cache.SynchronizeComputeDescriptors(); | 80 | texture_cache.SynchronizeComputeDescriptors(); |
| 90 | 81 | ||
| 91 | std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | 82 | boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; |
| 92 | boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||
| 93 | std::array<GLuint, MAX_TEXTURES> samplers; | 83 | std::array<GLuint, MAX_TEXTURES> samplers; |
| 94 | std::array<GLuint, MAX_TEXTURES> textures; | 84 | std::array<GLuint, MAX_TEXTURES> textures; |
| 95 | std::array<GLuint, MAX_IMAGES> images; | 85 | std::array<GLuint, MAX_IMAGES> images; |
| @@ -119,33 +109,39 @@ void ComputePipeline::Configure() { | |||
| 119 | } | 109 | } |
| 120 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | 110 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); |
| 121 | }}; | 111 | }}; |
| 122 | const auto add_image{[&](const auto& desc) { | 112 | const auto add_image{[&](const auto& desc, bool blacklist) { |
| 123 | for (u32 index = 0; index < desc.count; ++index) { | 113 | for (u32 index = 0; index < desc.count; ++index) { |
| 124 | const auto handle{read_handle(desc, index)}; | 114 | const auto handle{read_handle(desc, index)}; |
| 125 | image_view_indices.push_back(handle.first); | 115 | views.push_back({ |
| 116 | .index = handle.first, | ||
| 117 | .blacklist = blacklist, | ||
| 118 | .id = {}, | ||
| 119 | }); | ||
| 126 | } | 120 | } |
| 127 | }}; | 121 | }}; |
| 128 | for (const auto& desc : info.texture_buffer_descriptors) { | 122 | for (const auto& desc : info.texture_buffer_descriptors) { |
| 129 | for (u32 index = 0; index < desc.count; ++index) { | 123 | for (u32 index = 0; index < desc.count; ++index) { |
| 130 | const auto handle{read_handle(desc, index)}; | 124 | const auto handle{read_handle(desc, index)}; |
| 131 | image_view_indices.push_back(handle.first); | 125 | views.push_back({handle.first}); |
| 132 | samplers[sampler_binding++] = 0; | 126 | samplers[sampler_binding++] = 0; |
| 133 | } | 127 | } |
| 134 | } | 128 | } |
| 135 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | 129 | for (const auto& desc : info.image_buffer_descriptors) { |
| 130 | add_image(desc, false); | ||
| 131 | } | ||
| 136 | for (const auto& desc : info.texture_descriptors) { | 132 | for (const auto& desc : info.texture_descriptors) { |
| 137 | for (u32 index = 0; index < desc.count; ++index) { | 133 | for (u32 index = 0; index < desc.count; ++index) { |
| 138 | const auto handle{read_handle(desc, index)}; | 134 | const auto handle{read_handle(desc, index)}; |
| 139 | image_view_indices.push_back(handle.first); | 135 | views.push_back({handle.first}); |
| 140 | 136 | ||
| 141 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | 137 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); |
| 142 | samplers[sampler_binding++] = sampler->Handle(); | 138 | samplers[sampler_binding++] = sampler->Handle(); |
| 143 | } | 139 | } |
| 144 | } | 140 | } |
| 145 | std::ranges::for_each(info.image_descriptors, add_image); | 141 | for (const auto& desc : info.image_descriptors) { |
| 146 | 142 | add_image(desc, desc.is_written); | |
| 147 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 143 | } |
| 148 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | 144 | texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); |
| 149 | 145 | ||
| 150 | if (assembly_program.handle != 0) { | 146 | if (assembly_program.handle != 0) { |
| 151 | program_manager.BindComputeAssemblyProgram(assembly_program.handle); | 147 | program_manager.BindComputeAssemblyProgram(assembly_program.handle); |
| @@ -161,7 +157,7 @@ void ComputePipeline::Configure() { | |||
| 161 | if constexpr (is_image) { | 157 | if constexpr (is_image) { |
| 162 | is_written = desc.is_written; | 158 | is_written = desc.is_written; |
| 163 | } | 159 | } |
| 164 | ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; | 160 | ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)}; |
| 165 | buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), | 161 | buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), |
| 166 | image_view.BufferSize(), image_view.format, | 162 | image_view.BufferSize(), image_view.format, |
| 167 | is_written, is_image); | 163 | is_written, is_image); |
| @@ -177,23 +173,45 @@ void ComputePipeline::Configure() { | |||
| 177 | buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); | 173 | buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); |
| 178 | buffer_cache.BindHostComputeBuffers(); | 174 | buffer_cache.BindHostComputeBuffers(); |
| 179 | 175 | ||
| 180 | const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; | 176 | const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + |
| 177 | num_image_buffers}; | ||
| 181 | texture_binding += num_texture_buffers; | 178 | texture_binding += num_texture_buffers; |
| 182 | image_binding += num_image_buffers; | 179 | image_binding += num_image_buffers; |
| 183 | 180 | ||
| 181 | u32 texture_scaling_mask{}; | ||
| 184 | for (const auto& desc : info.texture_descriptors) { | 182 | for (const auto& desc : info.texture_descriptors) { |
| 185 | for (u32 index = 0; index < desc.count; ++index) { | 183 | for (u32 index = 0; index < desc.count; ++index) { |
| 186 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | 184 | ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; |
| 187 | textures[texture_binding++] = image_view.Handle(desc.type); | 185 | textures[texture_binding] = image_view.Handle(desc.type); |
| 186 | if (texture_cache.IsRescaling(image_view)) { | ||
| 187 | texture_scaling_mask |= 1u << texture_binding; | ||
| 188 | } | ||
| 189 | ++texture_binding; | ||
| 188 | } | 190 | } |
| 189 | } | 191 | } |
| 192 | u32 image_scaling_mask{}; | ||
| 190 | for (const auto& desc : info.image_descriptors) { | 193 | for (const auto& desc : info.image_descriptors) { |
| 191 | for (u32 index = 0; index < desc.count; ++index) { | 194 | for (u32 index = 0; index < desc.count; ++index) { |
| 192 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | 195 | ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; |
| 193 | if (desc.is_written) { | 196 | if (desc.is_written) { |
| 194 | texture_cache.MarkModification(image_view.image_id); | 197 | texture_cache.MarkModification(image_view.image_id); |
| 195 | } | 198 | } |
| 196 | images[image_binding++] = image_view.StorageView(desc.type, desc.format); | 199 | images[image_binding] = image_view.StorageView(desc.type, desc.format); |
| 200 | if (texture_cache.IsRescaling(image_view)) { | ||
| 201 | image_scaling_mask |= 1u << image_binding; | ||
| 202 | } | ||
| 203 | ++image_binding; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | if (info.uses_rescaling_uniform) { | ||
| 207 | const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)}; | ||
| 208 | const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)}; | ||
| 209 | if (assembly_program.handle != 0) { | ||
| 210 | glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask, | ||
| 211 | float_image_scaling_mask, 0.0f, 0.0f); | ||
| 212 | } else { | ||
| 213 | glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask, | ||
| 214 | float_image_scaling_mask, 0.0f, 0.0f); | ||
| 197 | } | 215 | } |
| 198 | } | 216 | } |
| 199 | if (texture_binding != 0) { | 217 | if (texture_binding != 0) { |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index bccb37a58..f8495896c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | #include "video_core/renderer_opengl/gl_shader_util.h" | 15 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 17 | #include "video_core/shader_notify.h" | 17 | #include "video_core/shader_notify.h" |
| 18 | #include "video_core/texture_cache/texture_cache_base.h" | 18 | #include "video_core/texture_cache/texture_cache.h" |
| 19 | 19 | ||
| 20 | #if defined(_MSC_VER) && defined(NDEBUG) | 20 | #if defined(_MSC_VER) && defined(NDEBUG) |
| 21 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | 21 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] |
| @@ -27,6 +27,7 @@ namespace OpenGL { | |||
| 27 | namespace { | 27 | namespace { |
| 28 | using Shader::ImageBufferDescriptor; | 28 | using Shader::ImageBufferDescriptor; |
| 29 | using Shader::ImageDescriptor; | 29 | using Shader::ImageDescriptor; |
| 30 | using Shader::NumDescriptors; | ||
| 30 | using Shader::TextureBufferDescriptor; | 31 | using Shader::TextureBufferDescriptor; |
| 31 | using Shader::TextureDescriptor; | 32 | using Shader::TextureDescriptor; |
| 32 | using Tegra::Texture::TexturePair; | 33 | using Tegra::Texture::TexturePair; |
| @@ -35,15 +36,6 @@ using VideoCommon::ImageId; | |||
| 35 | constexpr u32 MAX_TEXTURES = 64; | 36 | constexpr u32 MAX_TEXTURES = 64; |
| 36 | constexpr u32 MAX_IMAGES = 8; | 37 | constexpr u32 MAX_IMAGES = 8; |
| 37 | 38 | ||
| 38 | template <typename Range> | ||
| 39 | u32 AccumulateCount(const Range& range) { | ||
| 40 | u32 num{}; | ||
| 41 | for (const auto& desc : range) { | ||
| 42 | num += desc.count; | ||
| 43 | } | ||
| 44 | return num; | ||
| 45 | } | ||
| 46 | |||
| 47 | GLenum Stage(size_t stage_index) { | 39 | GLenum Stage(size_t stage_index) { |
| 48 | switch (stage_index) { | 40 | switch (stage_index) { |
| 49 | case 0: | 41 | case 0: |
| @@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline( | |||
| 204 | base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; | 196 | base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; |
| 205 | base_storage_bindings[stage + 1] = base_storage_bindings[stage]; | 197 | base_storage_bindings[stage + 1] = base_storage_bindings[stage]; |
| 206 | 198 | ||
| 207 | base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); | 199 | base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors); |
| 208 | base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); | 200 | base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors); |
| 209 | } | 201 | } |
| 210 | enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; | 202 | enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; |
| 211 | std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | 203 | std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); |
| 212 | 204 | ||
| 213 | const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; | 205 | const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)}; |
| 214 | num_texture_buffers[stage] += num_tex_buffer_bindings; | 206 | num_texture_buffers[stage] += num_tex_buffer_bindings; |
| 215 | num_textures += num_tex_buffer_bindings; | 207 | num_textures += num_tex_buffer_bindings; |
| 216 | 208 | ||
| 217 | const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; | 209 | const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)}; |
| 218 | num_image_buffers[stage] += num_img_buffers_bindings; | 210 | num_image_buffers[stage] += num_img_buffers_bindings; |
| 219 | num_images += num_img_buffers_bindings; | 211 | num_images += num_img_buffers_bindings; |
| 220 | 212 | ||
| 221 | num_textures += AccumulateCount(info.texture_descriptors); | 213 | num_textures += NumDescriptors(info.texture_descriptors); |
| 222 | num_images += AccumulateCount(info.image_descriptors); | 214 | num_images += NumDescriptors(info.image_descriptors); |
| 223 | num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); | 215 | num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors); |
| 224 | 216 | ||
| 225 | writes_global_memory |= std::ranges::any_of( | 217 | writes_global_memory |= std::ranges::any_of( |
| 226 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | 218 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); |
| @@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline( | |||
| 288 | 280 | ||
| 289 | template <typename Spec> | 281 | template <typename Spec> |
| 290 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | 282 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { |
| 291 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | 283 | std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; |
| 292 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||
| 293 | std::array<GLuint, MAX_TEXTURES> samplers; | 284 | std::array<GLuint, MAX_TEXTURES> samplers; |
| 294 | size_t image_view_index{}; | 285 | size_t views_index{}; |
| 295 | GLsizei sampler_binding{}; | 286 | GLsizei sampler_binding{}; |
| 296 | 287 | ||
| 297 | texture_cache.SynchronizeGraphicsDescriptors(); | 288 | texture_cache.SynchronizeGraphicsDescriptors(); |
| @@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 336 | } | 327 | } |
| 337 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | 328 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); |
| 338 | }}; | 329 | }}; |
| 339 | const auto add_image{[&](const auto& desc) { | 330 | const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { |
| 340 | for (u32 index = 0; index < desc.count; ++index) { | 331 | for (u32 index = 0; index < desc.count; ++index) { |
| 341 | const auto handle{read_handle(desc, index)}; | 332 | const auto handle{read_handle(desc, index)}; |
| 342 | image_view_indices[image_view_index++] = handle.first; | 333 | views[views_index++] = { |
| 334 | .index = handle.first, | ||
| 335 | .blacklist = blacklist, | ||
| 336 | .id = {}, | ||
| 337 | }; | ||
| 343 | } | 338 | } |
| 344 | }}; | 339 | }}; |
| 345 | if constexpr (Spec::has_texture_buffers) { | 340 | if constexpr (Spec::has_texture_buffers) { |
| 346 | for (const auto& desc : info.texture_buffer_descriptors) { | 341 | for (const auto& desc : info.texture_buffer_descriptors) { |
| 347 | for (u32 index = 0; index < desc.count; ++index) { | 342 | for (u32 index = 0; index < desc.count; ++index) { |
| 348 | const auto handle{read_handle(desc, index)}; | 343 | const auto handle{read_handle(desc, index)}; |
| 349 | image_view_indices[image_view_index++] = handle.first; | 344 | views[views_index++] = {handle.first}; |
| 350 | samplers[sampler_binding++] = 0; | 345 | samplers[sampler_binding++] = 0; |
| 351 | } | 346 | } |
| 352 | } | 347 | } |
| 353 | } | 348 | } |
| 354 | if constexpr (Spec::has_image_buffers) { | 349 | if constexpr (Spec::has_image_buffers) { |
| 355 | for (const auto& desc : info.image_buffer_descriptors) { | 350 | for (const auto& desc : info.image_buffer_descriptors) { |
| 356 | add_image(desc); | 351 | add_image(desc, false); |
| 357 | } | 352 | } |
| 358 | } | 353 | } |
| 359 | for (const auto& desc : info.texture_descriptors) { | 354 | for (const auto& desc : info.texture_descriptors) { |
| 360 | for (u32 index = 0; index < desc.count; ++index) { | 355 | for (u32 index = 0; index < desc.count; ++index) { |
| 361 | const auto handle{read_handle(desc, index)}; | 356 | const auto handle{read_handle(desc, index)}; |
| 362 | image_view_indices[image_view_index++] = handle.first; | 357 | views[views_index++] = {handle.first}; |
| 363 | 358 | ||
| 364 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; | 359 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; |
| 365 | samplers[sampler_binding++] = sampler->Handle(); | 360 | samplers[sampler_binding++] = sampler->Handle(); |
| @@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 367 | } | 362 | } |
| 368 | if constexpr (Spec::has_images) { | 363 | if constexpr (Spec::has_images) { |
| 369 | for (const auto& desc : info.image_descriptors) { | 364 | for (const auto& desc : info.image_descriptors) { |
| 370 | add_image(desc); | 365 | add_image(desc, desc.is_written); |
| 371 | } | 366 | } |
| 372 | } | 367 | } |
| 373 | }}; | 368 | }}; |
| @@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 386 | if constexpr (Spec::enabled_stages[4]) { | 381 | if constexpr (Spec::enabled_stages[4]) { |
| 387 | config_stage(4); | 382 | config_stage(4); |
| 388 | } | 383 | } |
| 389 | const std::span indices_span(image_view_indices.data(), image_view_index); | 384 | texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index)); |
| 390 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 391 | 385 | ||
| 392 | texture_cache.UpdateRenderTargets(false); | 386 | texture_cache.UpdateRenderTargets(false); |
| 393 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 387 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| 394 | 388 | ||
| 395 | ImageId* texture_buffer_index{image_view_ids.data()}; | 389 | VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; |
| 396 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | 390 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { |
| 397 | size_t index{}; | 391 | size_t index{}; |
| 398 | const auto add_buffer{[&](const auto& desc) { | 392 | const auto add_buffer{[&](const auto& desc) { |
| @@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 402 | if constexpr (is_image) { | 396 | if constexpr (is_image) { |
| 403 | is_written = desc.is_written; | 397 | is_written = desc.is_written; |
| 404 | } | 398 | } |
| 405 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | 399 | ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; |
| 406 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | 400 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), |
| 407 | image_view.BufferSize(), image_view.format, | 401 | image_view.BufferSize(), image_view.format, |
| 408 | is_written, is_image); | 402 | is_written, is_image); |
| 409 | ++index; | 403 | ++index; |
| 410 | ++texture_buffer_index; | 404 | ++texture_buffer_it; |
| 411 | } | 405 | } |
| 412 | }}; | 406 | }}; |
| 413 | const Shader::Info& info{stage_infos[stage]}; | 407 | const Shader::Info& info{stage_infos[stage]}; |
| @@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 423 | add_buffer(desc); | 417 | add_buffer(desc); |
| 424 | } | 418 | } |
| 425 | } | 419 | } |
| 426 | for (const auto& desc : info.texture_descriptors) { | 420 | texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); |
| 427 | texture_buffer_index += desc.count; | ||
| 428 | } | ||
| 429 | if constexpr (Spec::has_images) { | 421 | if constexpr (Spec::has_images) { |
| 430 | for (const auto& desc : info.image_descriptors) { | 422 | texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); |
| 431 | texture_buffer_index += desc.count; | ||
| 432 | } | ||
| 433 | } | 423 | } |
| 434 | }}; | 424 | }}; |
| 435 | if constexpr (Spec::enabled_stages[0]) { | 425 | if constexpr (Spec::enabled_stages[0]) { |
| @@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 453 | if (!is_built.load(std::memory_order::relaxed)) { | 443 | if (!is_built.load(std::memory_order::relaxed)) { |
| 454 | WaitForBuild(); | 444 | WaitForBuild(); |
| 455 | } | 445 | } |
| 456 | if (assembly_programs[0].handle != 0) { | 446 | const bool use_assembly{assembly_programs[0].handle != 0}; |
| 447 | if (use_assembly) { | ||
| 457 | program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); | 448 | program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); |
| 458 | } else { | 449 | } else { |
| 459 | program_manager.BindSourcePrograms(source_programs); | 450 | program_manager.BindSourcePrograms(source_programs); |
| 460 | } | 451 | } |
| 461 | const ImageId* views_it{image_view_ids.data()}; | 452 | const VideoCommon::ImageViewInOut* views_it{views.data()}; |
| 462 | GLsizei texture_binding = 0; | 453 | GLsizei texture_binding = 0; |
| 463 | GLsizei image_binding = 0; | 454 | GLsizei image_binding = 0; |
| 464 | std::array<GLuint, MAX_TEXTURES> textures; | 455 | std::array<GLuint, MAX_TEXTURES> textures; |
| @@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 473 | views_it += num_texture_buffers[stage]; | 464 | views_it += num_texture_buffers[stage]; |
| 474 | views_it += num_image_buffers[stage]; | 465 | views_it += num_image_buffers[stage]; |
| 475 | 466 | ||
| 467 | u32 texture_scaling_mask{}; | ||
| 468 | u32 image_scaling_mask{}; | ||
| 469 | u32 stage_texture_binding{}; | ||
| 470 | u32 stage_image_binding{}; | ||
| 471 | |||
| 476 | const auto& info{stage_infos[stage]}; | 472 | const auto& info{stage_infos[stage]}; |
| 477 | for (const auto& desc : info.texture_descriptors) { | 473 | for (const auto& desc : info.texture_descriptors) { |
| 478 | for (u32 index = 0; index < desc.count; ++index) { | 474 | for (u32 index = 0; index < desc.count; ++index) { |
| 479 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | 475 | ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; |
| 480 | textures[texture_binding++] = image_view.Handle(desc.type); | 476 | textures[texture_binding] = image_view.Handle(desc.type); |
| 477 | if (texture_cache.IsRescaling(image_view)) { | ||
| 478 | texture_scaling_mask |= 1u << stage_texture_binding; | ||
| 479 | } | ||
| 480 | ++texture_binding; | ||
| 481 | ++stage_texture_binding; | ||
| 481 | } | 482 | } |
| 482 | } | 483 | } |
| 483 | for (const auto& desc : info.image_descriptors) { | 484 | for (const auto& desc : info.image_descriptors) { |
| 484 | for (u32 index = 0; index < desc.count; ++index) { | 485 | for (u32 index = 0; index < desc.count; ++index) { |
| 485 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | 486 | ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; |
| 486 | if (desc.is_written) { | 487 | if (desc.is_written) { |
| 487 | texture_cache.MarkModification(image_view.image_id); | 488 | texture_cache.MarkModification(image_view.image_id); |
| 488 | } | 489 | } |
| 489 | images[image_binding++] = image_view.StorageView(desc.type, desc.format); | 490 | images[image_binding] = image_view.StorageView(desc.type, desc.format); |
| 491 | if (texture_cache.IsRescaling(image_view)) { | ||
| 492 | image_scaling_mask |= 1u << stage_image_binding; | ||
| 493 | } | ||
| 494 | ++image_binding; | ||
| 495 | ++stage_image_binding; | ||
| 496 | } | ||
| 497 | } | ||
| 498 | if (info.uses_rescaling_uniform) { | ||
| 499 | const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)}; | ||
| 500 | const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)}; | ||
| 501 | const bool is_rescaling{texture_cache.IsRescaling()}; | ||
| 502 | const f32 config_down_factor{Settings::values.resolution_info.down_factor}; | ||
| 503 | const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; | ||
| 504 | if (use_assembly) { | ||
| 505 | glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask, | ||
| 506 | float_image_scaling_mask, down_factor, 0.0f); | ||
| 507 | } else { | ||
| 508 | glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask, | ||
| 509 | float_image_scaling_mask, down_factor, 0.0f); | ||
| 490 | } | 510 | } |
| 491 | } | 511 | } |
| 492 | }}; | 512 | }}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a6d9f7c43..9b516c64f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -184,6 +184,10 @@ void RasterizerOpenGL::Clear() { | |||
| 184 | SyncRasterizeEnable(); | 184 | SyncRasterizeEnable(); |
| 185 | SyncStencilTestState(); | 185 | SyncStencilTestState(); |
| 186 | 186 | ||
| 187 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 188 | texture_cache.UpdateRenderTargets(true); | ||
| 189 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 190 | SyncViewport(); | ||
| 187 | if (regs.clear_flags.scissor) { | 191 | if (regs.clear_flags.scissor) { |
| 188 | SyncScissorTest(); | 192 | SyncScissorTest(); |
| 189 | } else { | 193 | } else { |
| @@ -192,10 +196,6 @@ void RasterizerOpenGL::Clear() { | |||
| 192 | } | 196 | } |
| 193 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); | 197 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); |
| 194 | 198 | ||
| 195 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 196 | texture_cache.UpdateRenderTargets(true); | ||
| 197 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 198 | |||
| 199 | if (use_color) { | 199 | if (use_color) { |
| 200 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 200 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| 201 | } | 201 | } |
| @@ -214,8 +214,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 214 | 214 | ||
| 215 | query_cache.UpdateCounters(); | 215 | query_cache.UpdateCounters(); |
| 216 | 216 | ||
| 217 | SyncState(); | ||
| 218 | |||
| 219 | GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; | 217 | GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; |
| 220 | if (!pipeline) { | 218 | if (!pipeline) { |
| 221 | return; | 219 | return; |
| @@ -223,6 +221,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 223 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 221 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 224 | pipeline->Configure(is_indexed); | 222 | pipeline->Configure(is_indexed); |
| 225 | 223 | ||
| 224 | SyncState(); | ||
| 225 | |||
| 226 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); | 226 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); |
| 227 | BeginTransformFeedback(pipeline, primitive_mode); | 227 | BeginTransformFeedback(pipeline, primitive_mode); |
| 228 | 228 | ||
| @@ -533,7 +533,8 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 533 | auto& flags = maxwell3d.dirty.flags; | 533 | auto& flags = maxwell3d.dirty.flags; |
| 534 | const auto& regs = maxwell3d.regs; | 534 | const auto& regs = maxwell3d.regs; |
| 535 | 535 | ||
| 536 | const bool dirty_viewport = flags[Dirty::Viewports]; | 536 | const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports]; |
| 537 | const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports; | ||
| 537 | const bool dirty_clip_control = flags[Dirty::ClipControl]; | 538 | const bool dirty_clip_control = flags[Dirty::ClipControl]; |
| 538 | 539 | ||
| 539 | if (dirty_clip_control || flags[Dirty::FrontFace]) { | 540 | if (dirty_clip_control || flags[Dirty::FrontFace]) { |
| @@ -553,8 +554,7 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 553 | } | 554 | } |
| 554 | glFrontFace(mode); | 555 | glFrontFace(mode); |
| 555 | } | 556 | } |
| 556 | 557 | if (dirty_viewport || dirty_clip_control) { | |
| 557 | if (dirty_viewport || flags[Dirty::ClipControl]) { | ||
| 558 | flags[Dirty::ClipControl] = false; | 558 | flags[Dirty::ClipControl] = false; |
| 559 | 559 | ||
| 560 | bool flip_y = false; | 560 | bool flip_y = false; |
| @@ -570,37 +570,58 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 570 | state_tracker.ClipControl(origin, depth); | 570 | state_tracker.ClipControl(origin, depth); |
| 571 | state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); | 571 | state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); |
| 572 | } | 572 | } |
| 573 | const bool is_rescaling{texture_cache.IsRescaling()}; | ||
| 574 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; | ||
| 575 | const auto conv = [scale](float value) -> GLfloat { | ||
| 576 | float new_value = value * scale; | ||
| 577 | if (scale < 1.0f) { | ||
| 578 | const bool sign = std::signbit(value); | ||
| 579 | new_value = std::round(std::abs(new_value)); | ||
| 580 | new_value = sign ? -new_value : new_value; | ||
| 581 | } | ||
| 582 | return static_cast<GLfloat>(new_value); | ||
| 583 | }; | ||
| 573 | 584 | ||
| 574 | if (dirty_viewport) { | 585 | if (dirty_viewport) { |
| 575 | flags[Dirty::Viewports] = false; | 586 | flags[Dirty::Viewports] = false; |
| 576 | 587 | ||
| 577 | const bool force = flags[Dirty::ViewportTransform]; | 588 | const bool force = flags[Dirty::ViewportTransform] || rescale_viewports; |
| 578 | flags[Dirty::ViewportTransform] = false; | 589 | flags[Dirty::ViewportTransform] = false; |
| 590 | flags[VideoCommon::Dirty::RescaleViewports] = false; | ||
| 579 | 591 | ||
| 580 | for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { | 592 | for (size_t index = 0; index < Maxwell::NumViewports; ++index) { |
| 581 | if (!force && !flags[Dirty::Viewport0 + i]) { | 593 | if (!force && !flags[Dirty::Viewport0 + index]) { |
| 582 | continue; | 594 | continue; |
| 583 | } | 595 | } |
| 584 | flags[Dirty::Viewport0 + i] = false; | 596 | flags[Dirty::Viewport0 + index] = false; |
| 585 | 597 | ||
| 586 | const auto& src = regs.viewport_transform[i]; | 598 | const auto& src = regs.viewport_transform[index]; |
| 587 | const Common::Rectangle<f32> rect{src.GetRect()}; | 599 | GLfloat x = conv(src.translate_x - src.scale_x); |
| 588 | glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), | 600 | GLfloat y = conv(src.translate_y - src.scale_y); |
| 589 | rect.GetHeight()); | 601 | GLfloat width = conv(src.scale_x * 2.0f); |
| 602 | GLfloat height = conv(src.scale_y * 2.0f); | ||
| 603 | |||
| 604 | if (height < 0) { | ||
| 605 | y += height; | ||
| 606 | height = -height; | ||
| 607 | } | ||
| 608 | glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f, | ||
| 609 | height != 0.0f ? height : 1.0f); | ||
| 590 | 610 | ||
| 591 | const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | 611 | const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; |
| 592 | const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; | 612 | const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; |
| 593 | const GLdouble far_depth = src.translate_z + src.scale_z; | 613 | const GLdouble far_depth = src.translate_z + src.scale_z; |
| 594 | if (device.HasDepthBufferFloat()) { | 614 | if (device.HasDepthBufferFloat()) { |
| 595 | glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth); | 615 | glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth); |
| 596 | } else { | 616 | } else { |
| 597 | glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); | 617 | glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth); |
| 598 | } | 618 | } |
| 599 | 619 | ||
| 600 | if (!GLAD_GL_NV_viewport_swizzle) { | 620 | if (!GLAD_GL_NV_viewport_swizzle) { |
| 601 | continue; | 621 | continue; |
| 602 | } | 622 | } |
| 603 | glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x), | 623 | glViewportSwizzleNV(static_cast<GLuint>(index), |
| 624 | MaxwellToGL::ViewportSwizzle(src.swizzle.x), | ||
| 604 | MaxwellToGL::ViewportSwizzle(src.swizzle.y), | 625 | MaxwellToGL::ViewportSwizzle(src.swizzle.y), |
| 605 | MaxwellToGL::ViewportSwizzle(src.swizzle.z), | 626 | MaxwellToGL::ViewportSwizzle(src.swizzle.z), |
| 606 | MaxwellToGL::ViewportSwizzle(src.swizzle.w)); | 627 | MaxwellToGL::ViewportSwizzle(src.swizzle.w)); |
| @@ -903,14 +924,34 @@ void RasterizerOpenGL::SyncLogicOpState() { | |||
| 903 | 924 | ||
| 904 | void RasterizerOpenGL::SyncScissorTest() { | 925 | void RasterizerOpenGL::SyncScissorTest() { |
| 905 | auto& flags = maxwell3d.dirty.flags; | 926 | auto& flags = maxwell3d.dirty.flags; |
| 906 | if (!flags[Dirty::Scissors]) { | 927 | if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) { |
| 907 | return; | 928 | return; |
| 908 | } | 929 | } |
| 909 | flags[Dirty::Scissors] = false; | 930 | flags[Dirty::Scissors] = false; |
| 910 | 931 | ||
| 932 | const bool force = flags[VideoCommon::Dirty::RescaleScissors]; | ||
| 933 | flags[VideoCommon::Dirty::RescaleScissors] = false; | ||
| 934 | |||
| 911 | const auto& regs = maxwell3d.regs; | 935 | const auto& regs = maxwell3d.regs; |
| 936 | |||
| 937 | const auto& resolution = Settings::values.resolution_info; | ||
| 938 | const bool is_rescaling{texture_cache.IsRescaling()}; | ||
| 939 | const u32 up_scale = is_rescaling ? resolution.up_scale : 1U; | ||
| 940 | const u32 down_shift = is_rescaling ? resolution.down_shift : 0U; | ||
| 941 | const auto scale_up = [up_scale, down_shift](u32 value) -> u32 { | ||
| 942 | if (value == 0) { | ||
| 943 | return 0U; | ||
| 944 | } | ||
| 945 | const u32 upset = value * up_scale; | ||
| 946 | u32 acumm{}; | ||
| 947 | if ((up_scale >> down_shift) == 0) { | ||
| 948 | acumm = upset % 2; | ||
| 949 | } | ||
| 950 | const u32 converted_value = upset >> down_shift; | ||
| 951 | return std::max<u32>(converted_value + acumm, 1U); | ||
| 952 | }; | ||
| 912 | for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { | 953 | for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { |
| 913 | if (!flags[Dirty::Scissor0 + index]) { | 954 | if (!force && !flags[Dirty::Scissor0 + index]) { |
| 914 | continue; | 955 | continue; |
| 915 | } | 956 | } |
| 916 | flags[Dirty::Scissor0 + index] = false; | 957 | flags[Dirty::Scissor0 + index] = false; |
| @@ -918,8 +959,8 @@ void RasterizerOpenGL::SyncScissorTest() { | |||
| 918 | const auto& src = regs.scissor_test[index]; | 959 | const auto& src = regs.scissor_test[index]; |
| 919 | if (src.enable) { | 960 | if (src.enable) { |
| 920 | glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); | 961 | glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); |
| 921 | glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y, | 962 | glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y), |
| 922 | src.max_x - src.min_x, src.max_y - src.min_y); | 963 | scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y)); |
| 923 | } else { | 964 | } else { |
| 924 | glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); | 965 | glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); |
| 925 | } | 966 | } |
| @@ -935,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 935 | 976 | ||
| 936 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); | 977 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); |
| 937 | oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); | 978 | oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); |
| 938 | 979 | const bool is_rescaling{texture_cache.IsRescaling()}; | |
| 939 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); | 980 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; |
| 981 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); | ||
| 940 | } | 982 | } |
| 941 | 983 | ||
| 942 | void RasterizerOpenGL::SyncLineState() { | 984 | void RasterizerOpenGL::SyncLineState() { |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 8695c29e3..5e7101d28 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -166,7 +166,12 @@ void OGLFramebuffer::Create() { | |||
| 166 | return; | 166 | return; |
| 167 | 167 | ||
| 168 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | 168 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); |
| 169 | // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of | ||
| 170 | // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared | ||
| 171 | // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with | ||
| 172 | // mismatching size, this is why core framebuffers are preferred. | ||
| 169 | glGenFramebuffers(1, &handle); | 173 | glGenFramebuffers(1, &handle); |
| 174 | glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); | ||
| 170 | } | 175 | } |
| 171 | 176 | ||
| 172 | void OGLFramebuffer::Release() { | 177 | void OGLFramebuffer::Release() { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 02682bd76..42ef67628 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 426 | // Normal path | 426 | // Normal path |
| 427 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); | 427 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); |
| 428 | 428 | ||
| 429 | for (const auto& desc : programs[index].info.storage_buffers_descriptors) { | 429 | total_storage_buffers += |
| 430 | total_storage_buffers += desc.count; | 430 | Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors); |
| 431 | } | ||
| 432 | } else { | 431 | } else { |
| 433 | // VertexB path when VertexA is present. | 432 | // VertexB path when VertexA is present. |
| 434 | auto& program_va{programs[0]}; | 433 | auto& program_va{programs[0]}; |
| 435 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | 434 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 436 | for (const auto& desc : program_vb.info.storage_buffers_descriptors) { | 435 | total_storage_buffers += |
| 437 | total_storage_buffers += desc.count; | 436 | Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors); |
| 438 | } | ||
| 439 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | 437 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); |
| 440 | } | 438 | } |
| 441 | } | 439 | } |
| @@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | |||
| 510 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 508 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 511 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | 509 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 512 | 510 | ||
| 513 | u32 num_storage_buffers{}; | 511 | const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; |
| 514 | for (const auto& desc : program.info.storage_buffers_descriptors) { | ||
| 515 | num_storage_buffers += desc.count; | ||
| 516 | } | ||
| 517 | Shader::RuntimeInfo info; | 512 | Shader::RuntimeInfo info; |
| 518 | info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | 513 | info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); |
| 519 | 514 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 8c3ca3d82..2f7d98d8b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -9,8 +9,8 @@ | |||
| 9 | 9 | ||
| 10 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 11 | 11 | ||
| 12 | #include "common/literals.h" | ||
| 12 | #include "common/settings.h" | 13 | #include "common/settings.h" |
| 13 | |||
| 14 | #include "video_core/renderer_opengl/gl_device.h" | 14 | #include "video_core/renderer_opengl/gl_device.h" |
| 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| @@ -42,6 +42,7 @@ using VideoCore::Surface::IsPixelFormatSRGB; | |||
| 42 | using VideoCore::Surface::MaxPixelFormat; | 42 | using VideoCore::Surface::MaxPixelFormat; |
| 43 | using VideoCore::Surface::PixelFormat; | 43 | using VideoCore::Surface::PixelFormat; |
| 44 | using VideoCore::Surface::SurfaceType; | 44 | using VideoCore::Surface::SurfaceType; |
| 45 | using namespace Common::Literals; | ||
| 45 | 46 | ||
| 46 | struct CopyOrigin { | 47 | struct CopyOrigin { |
| 47 | GLint level; | 48 | GLint level; |
| @@ -316,6 +317,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 316 | } | 317 | } |
| 317 | } | 318 | } |
| 318 | 319 | ||
| 320 | OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { | ||
| 321 | const GLenum target = ImageTarget(info); | ||
| 322 | const GLsizei width = info.size.width; | ||
| 323 | const GLsizei height = info.size.height; | ||
| 324 | const GLsizei depth = info.size.depth; | ||
| 325 | const int max_host_mip_levels = std::bit_width(info.size.width); | ||
| 326 | const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); | ||
| 327 | const GLsizei num_layers = info.resources.layers; | ||
| 328 | const GLsizei num_samples = info.num_samples; | ||
| 329 | |||
| 330 | GLuint handle = 0; | ||
| 331 | OGLTexture texture; | ||
| 332 | if (target != GL_TEXTURE_BUFFER) { | ||
| 333 | texture.Create(target); | ||
| 334 | handle = texture.handle; | ||
| 335 | } | ||
| 336 | switch (target) { | ||
| 337 | case GL_TEXTURE_1D_ARRAY: | ||
| 338 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); | ||
| 339 | break; | ||
| 340 | case GL_TEXTURE_2D_ARRAY: | ||
| 341 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); | ||
| 342 | break; | ||
| 343 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { | ||
| 344 | // TODO: Where should 'fixedsamplelocations' come from? | ||
| 345 | const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); | ||
| 346 | glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, | ||
| 347 | height >> samples_y, num_layers, GL_FALSE); | ||
| 348 | break; | ||
| 349 | } | ||
| 350 | case GL_TEXTURE_RECTANGLE: | ||
| 351 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); | ||
| 352 | break; | ||
| 353 | case GL_TEXTURE_3D: | ||
| 354 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | ||
| 355 | break; | ||
| 356 | case GL_TEXTURE_BUFFER: | ||
| 357 | UNREACHABLE(); | ||
| 358 | break; | ||
| 359 | default: | ||
| 360 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); | ||
| 361 | break; | ||
| 362 | } | ||
| 363 | return texture; | ||
| 364 | } | ||
| 365 | |||
| 319 | [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { | 366 | [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { |
| 320 | switch (format) { | 367 | switch (format) { |
| 321 | case PixelFormat::B5G6R5_UNORM: | 368 | case PixelFormat::B5G6R5_UNORM: |
| @@ -359,7 +406,8 @@ ImageBufferMap::~ImageBufferMap() { | |||
| 359 | 406 | ||
| 360 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, | 407 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, |
| 361 | StateTracker& state_tracker_) | 408 | StateTracker& state_tracker_) |
| 362 | : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { | 409 | : device{device_}, state_tracker{state_tracker_}, |
| 410 | util_shaders(program_manager), resolution{Settings::values.resolution_info} { | ||
| 363 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | 411 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; |
| 364 | for (size_t i = 0; i < TARGETS.size(); ++i) { | 412 | for (size_t i = 0; i < TARGETS.size(); ++i) { |
| 365 | const GLenum target = TARGETS[i]; | 413 | const GLenum target = TARGETS[i]; |
| @@ -426,6 +474,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 426 | set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); | 474 | set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); |
| 427 | set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); | 475 | set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); |
| 428 | set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); | 476 | set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); |
| 477 | |||
| 478 | if (resolution.active) { | ||
| 479 | for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) { | ||
| 480 | rescale_draw_fbos[i].Create(); | ||
| 481 | rescale_read_fbos[i].Create(); | ||
| 482 | } | ||
| 483 | } | ||
| 429 | } | 484 | } |
| 430 | 485 | ||
| 431 | TextureCacheRuntime::~TextureCacheRuntime() = default; | 486 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| @@ -442,6 +497,15 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | |||
| 442 | return download_buffers.RequestMap(size, false); | 497 | return download_buffers.RequestMap(size, false); |
| 443 | } | 498 | } |
| 444 | 499 | ||
| 500 | u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | ||
| 501 | if (GLAD_GL_NVX_gpu_memory_info) { | ||
| 502 | GLint cur_avail_mem_kb = 0; | ||
| 503 | glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); | ||
| 504 | return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; | ||
| 505 | } | ||
| 506 | return 2_GiB; // Return minimum requirements | ||
| 507 | } | ||
| 508 | |||
| 445 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, | 509 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, |
| 446 | std::span<const ImageCopy> copies) { | 510 | std::span<const ImageCopy> copies) { |
| 447 | const GLuint dst_name = dst_image.Handle(); | 511 | const GLuint dst_name = dst_image.Handle(); |
| @@ -605,13 +669,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req | |||
| 605 | return found; | 669 | return found; |
| 606 | } | 670 | } |
| 607 | 671 | ||
| 608 | Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, | 672 | Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, |
| 609 | VAddr cpu_addr_) | 673 | VAddr cpu_addr_) |
| 610 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { | 674 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { |
| 611 | if (CanBeAccelerated(runtime, info)) { | 675 | if (CanBeAccelerated(*runtime, info)) { |
| 612 | flags |= ImageFlagBits::AcceleratedUpload; | 676 | flags |= ImageFlagBits::AcceleratedUpload; |
| 613 | } | 677 | } |
| 614 | if (IsConverted(runtime.device, info.format, info.type)) { | 678 | if (IsConverted(runtime->device, info.format, info.type)) { |
| 615 | flags |= ImageFlagBits::Converted; | 679 | flags |= ImageFlagBits::Converted; |
| 616 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 680 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 617 | gl_format = GL_RGBA; | 681 | gl_format = GL_RGBA; |
| @@ -622,58 +686,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 622 | gl_format = tuple.format; | 686 | gl_format = tuple.format; |
| 623 | gl_type = tuple.type; | 687 | gl_type = tuple.type; |
| 624 | } | 688 | } |
| 625 | const GLenum target = ImageTarget(info); | 689 | texture = MakeImage(info, gl_internal_format); |
| 626 | const GLsizei width = info.size.width; | 690 | current_texture = texture.handle; |
| 627 | const GLsizei height = info.size.height; | 691 | if (runtime->device.HasDebuggingToolAttached()) { |
| 628 | const GLsizei depth = info.size.depth; | ||
| 629 | const int max_host_mip_levels = std::bit_width(info.size.width); | ||
| 630 | const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); | ||
| 631 | const GLsizei num_layers = info.resources.layers; | ||
| 632 | const GLsizei num_samples = info.num_samples; | ||
| 633 | |||
| 634 | GLuint handle = 0; | ||
| 635 | if (target != GL_TEXTURE_BUFFER) { | ||
| 636 | texture.Create(target); | ||
| 637 | handle = texture.handle; | ||
| 638 | } | ||
| 639 | switch (target) { | ||
| 640 | case GL_TEXTURE_1D_ARRAY: | ||
| 641 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); | ||
| 642 | break; | ||
| 643 | case GL_TEXTURE_2D_ARRAY: | ||
| 644 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); | ||
| 645 | break; | ||
| 646 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { | ||
| 647 | // TODO: Where should 'fixedsamplelocations' come from? | ||
| 648 | const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); | ||
| 649 | glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, | ||
| 650 | height >> samples_y, num_layers, GL_FALSE); | ||
| 651 | break; | ||
| 652 | } | ||
| 653 | case GL_TEXTURE_RECTANGLE: | ||
| 654 | glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); | ||
| 655 | break; | ||
| 656 | case GL_TEXTURE_3D: | ||
| 657 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | ||
| 658 | break; | ||
| 659 | case GL_TEXTURE_BUFFER: | ||
| 660 | UNREACHABLE(); | ||
| 661 | break; | ||
| 662 | default: | ||
| 663 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); | ||
| 664 | break; | ||
| 665 | } | ||
| 666 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 667 | const std::string name = VideoCommon::Name(*this); | 692 | const std::string name = VideoCommon::Name(*this); |
| 668 | glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, | 693 | glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, |
| 669 | static_cast<GLsizei>(name.size()), name.data()); | 694 | texture.handle, static_cast<GLsizei>(name.size()), name.data()); |
| 670 | } | 695 | } |
| 671 | } | 696 | } |
| 672 | 697 | ||
| 698 | Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} | ||
| 699 | |||
| 673 | Image::~Image() = default; | 700 | Image::~Image() = default; |
| 674 | 701 | ||
| 675 | void Image::UploadMemory(const ImageBufferMap& map, | 702 | void Image::UploadMemory(const ImageBufferMap& map, |
| 676 | std::span<const VideoCommon::BufferImageCopy> copies) { | 703 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 704 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | ||
| 705 | if (is_rescaled) { | ||
| 706 | ScaleDown(true); | ||
| 707 | } | ||
| 677 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); | 708 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); |
| 678 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); | 709 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); |
| 679 | 710 | ||
| @@ -693,12 +724,18 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
| 693 | } | 724 | } |
| 694 | CopyBufferToImage(copy, map.offset); | 725 | CopyBufferToImage(copy, map.offset); |
| 695 | } | 726 | } |
| 727 | if (is_rescaled) { | ||
| 728 | ScaleUp(); | ||
| 729 | } | ||
| 696 | } | 730 | } |
| 697 | 731 | ||
| 698 | void Image::DownloadMemory(ImageBufferMap& map, | 732 | void Image::DownloadMemory(ImageBufferMap& map, |
| 699 | std::span<const VideoCommon::BufferImageCopy> copies) { | 733 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 734 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | ||
| 735 | if (is_rescaled) { | ||
| 736 | ScaleDown(); | ||
| 737 | } | ||
| 700 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 738 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 701 | |||
| 702 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); | 739 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); |
| 703 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | 740 | glPixelStorei(GL_PACK_ALIGNMENT, 1); |
| 704 | 741 | ||
| @@ -716,6 +753,9 @@ void Image::DownloadMemory(ImageBufferMap& map, | |||
| 716 | } | 753 | } |
| 717 | CopyImageToBuffer(copy, map.offset); | 754 | CopyImageToBuffer(copy, map.offset); |
| 718 | } | 755 | } |
| 756 | if (is_rescaled) { | ||
| 757 | ScaleUp(true); | ||
| 758 | } | ||
| 719 | } | 759 | } |
| 720 | 760 | ||
| 721 | GLuint Image::StorageHandle() noexcept { | 761 | GLuint Image::StorageHandle() noexcept { |
| @@ -741,11 +781,11 @@ GLuint Image::StorageHandle() noexcept { | |||
| 741 | return store_view.handle; | 781 | return store_view.handle; |
| 742 | } | 782 | } |
| 743 | store_view.Create(); | 783 | store_view.Create(); |
| 744 | glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0, | 784 | glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, |
| 745 | info.resources.levels, 0, info.resources.layers); | 785 | info.resources.levels, 0, info.resources.layers); |
| 746 | return store_view.handle; | 786 | return store_view.handle; |
| 747 | default: | 787 | default: |
| 748 | return texture.handle; | 788 | return current_texture; |
| 749 | } | 789 | } |
| 750 | } | 790 | } |
| 751 | 791 | ||
| @@ -849,6 +889,140 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b | |||
| 849 | } | 889 | } |
| 850 | } | 890 | } |
| 851 | 891 | ||
| 892 | void Image::Scale(bool up_scale) { | ||
| 893 | const auto format_type = GetFormatType(info.format); | ||
| 894 | const GLenum attachment = [format_type] { | ||
| 895 | switch (format_type) { | ||
| 896 | case SurfaceType::ColorTexture: | ||
| 897 | return GL_COLOR_ATTACHMENT0; | ||
| 898 | case SurfaceType::Depth: | ||
| 899 | return GL_DEPTH_ATTACHMENT; | ||
| 900 | case SurfaceType::DepthStencil: | ||
| 901 | return GL_DEPTH_STENCIL_ATTACHMENT; | ||
| 902 | default: | ||
| 903 | UNREACHABLE(); | ||
| 904 | return GL_COLOR_ATTACHMENT0; | ||
| 905 | } | ||
| 906 | }(); | ||
| 907 | const GLenum mask = [format_type] { | ||
| 908 | switch (format_type) { | ||
| 909 | case SurfaceType::ColorTexture: | ||
| 910 | return GL_COLOR_BUFFER_BIT; | ||
| 911 | case SurfaceType::Depth: | ||
| 912 | return GL_DEPTH_BUFFER_BIT; | ||
| 913 | case SurfaceType::DepthStencil: | ||
| 914 | return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; | ||
| 915 | default: | ||
| 916 | UNREACHABLE(); | ||
| 917 | return GL_COLOR_BUFFER_BIT; | ||
| 918 | } | ||
| 919 | }(); | ||
| 920 | const size_t fbo_index = [format_type] { | ||
| 921 | switch (format_type) { | ||
| 922 | case SurfaceType::ColorTexture: | ||
| 923 | return 0; | ||
| 924 | case SurfaceType::Depth: | ||
| 925 | return 1; | ||
| 926 | case SurfaceType::DepthStencil: | ||
| 927 | return 2; | ||
| 928 | default: | ||
| 929 | UNREACHABLE(); | ||
| 930 | return 0; | ||
| 931 | } | ||
| 932 | }(); | ||
| 933 | const bool is_2d = info.type == ImageType::e2D; | ||
| 934 | const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0}; | ||
| 935 | // Integer formats must use NEAREST filter | ||
| 936 | const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)}; | ||
| 937 | const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST; | ||
| 938 | |||
| 939 | const auto& resolution = runtime->resolution; | ||
| 940 | const u32 scaled_width = resolution.ScaleUp(info.size.width); | ||
| 941 | const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; | ||
| 942 | const u32 original_width = info.size.width; | ||
| 943 | const u32 original_height = info.size.height; | ||
| 944 | |||
| 945 | if (!upscaled_backup.handle) { | ||
| 946 | auto dst_info = info; | ||
| 947 | dst_info.size.width = scaled_width; | ||
| 948 | dst_info.size.height = scaled_height; | ||
| 949 | upscaled_backup = MakeImage(dst_info, gl_internal_format); | ||
| 950 | } | ||
| 951 | const u32 src_width = up_scale ? original_width : scaled_width; | ||
| 952 | const u32 src_height = up_scale ? original_height : scaled_height; | ||
| 953 | const u32 dst_width = up_scale ? scaled_width : original_width; | ||
| 954 | const u32 dst_height = up_scale ? scaled_height : original_height; | ||
| 955 | const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle; | ||
| 956 | const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle; | ||
| 957 | |||
| 958 | // TODO (ameerj): Investigate other GL states that affect blitting. | ||
| 959 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 960 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width), | ||
| 961 | static_cast<GLfloat>(dst_height)); | ||
| 962 | |||
| 963 | const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; | ||
| 964 | const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; | ||
| 965 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 966 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 967 | const u32 src_level_width = std::max(1u, src_width >> level); | ||
| 968 | const u32 src_level_height = std::max(1u, src_height >> level); | ||
| 969 | const u32 dst_level_width = std::max(1u, dst_width >> level); | ||
| 970 | const u32 dst_level_height = std::max(1u, dst_height >> level); | ||
| 971 | |||
| 972 | glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer); | ||
| 973 | glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer); | ||
| 974 | |||
| 975 | glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, | ||
| 976 | 0, dst_level_width, dst_level_height, mask, filter); | ||
| 977 | } | ||
| 978 | } | ||
| 979 | current_texture = dst_handle; | ||
| 980 | auto& state_tracker = runtime->GetStateTracker(); | ||
| 981 | state_tracker.NotifyViewport0(); | ||
| 982 | state_tracker.NotifyScissor0(); | ||
| 983 | } | ||
| 984 | |||
| 985 | bool Image::ScaleUp(bool ignore) { | ||
| 986 | if (True(flags & ImageFlagBits::Rescaled)) { | ||
| 987 | return false; | ||
| 988 | } | ||
| 989 | if (gl_format == 0 && gl_type == 0) { | ||
| 990 | // compressed textures | ||
| 991 | return false; | ||
| 992 | } | ||
| 993 | if (info.type == ImageType::Linear) { | ||
| 994 | UNREACHABLE(); | ||
| 995 | return false; | ||
| 996 | } | ||
| 997 | flags |= ImageFlagBits::Rescaled; | ||
| 998 | if (!runtime->resolution.active) { | ||
| 999 | return false; | ||
| 1000 | } | ||
| 1001 | has_scaled = true; | ||
| 1002 | if (ignore) { | ||
| 1003 | current_texture = upscaled_backup.handle; | ||
| 1004 | return true; | ||
| 1005 | } | ||
| 1006 | Scale(true); | ||
| 1007 | return true; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | bool Image::ScaleDown(bool ignore) { | ||
| 1011 | if (False(flags & ImageFlagBits::Rescaled)) { | ||
| 1012 | return false; | ||
| 1013 | } | ||
| 1014 | flags &= ~ImageFlagBits::Rescaled; | ||
| 1015 | if (!runtime->resolution.active) { | ||
| 1016 | return false; | ||
| 1017 | } | ||
| 1018 | if (ignore) { | ||
| 1019 | current_texture = texture.handle; | ||
| 1020 | return true; | ||
| 1021 | } | ||
| 1022 | Scale(false); | ||
| 1023 | return true; | ||
| 1024 | } | ||
| 1025 | |||
| 852 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, | 1026 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 853 | ImageId image_id_, Image& image) | 1027 | ImageId image_id_, Image& image) |
| 854 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { | 1028 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { |
| @@ -862,7 +1036,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 862 | flat_range = info.range; | 1036 | flat_range = info.range; |
| 863 | set_object_label = device.HasDebuggingToolAttached(); | 1037 | set_object_label = device.HasDebuggingToolAttached(); |
| 864 | is_render_target = info.IsRenderTarget(); | 1038 | is_render_target = info.IsRenderTarget(); |
| 865 | original_texture = image.texture.handle; | 1039 | original_texture = image.Handle(); |
| 866 | num_samples = image.info.num_samples; | 1040 | num_samples = image.info.num_samples; |
| 867 | if (!is_render_target) { | 1041 | if (!is_render_target) { |
| 868 | swizzle[0] = info.x_source; | 1042 | swizzle[0] = info.x_source; |
| @@ -950,9 +1124,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | |||
| 950 | const VideoCommon::ImageViewInfo& view_info) | 1124 | const VideoCommon::ImageViewInfo& view_info) |
| 951 | : VideoCommon::ImageViewBase{info, view_info} {} | 1125 | : VideoCommon::ImageViewBase{info, view_info} {} |
| 952 | 1126 | ||
| 953 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) | 1127 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) |
| 954 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} | 1128 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} |
| 955 | 1129 | ||
| 1130 | ImageView::~ImageView() = default; | ||
| 1131 | |||
| 956 | GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { | 1132 | GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { |
| 957 | if (image_format == Shader::ImageFormat::Typeless) { | 1133 | if (image_format == Shader::ImageFormat::Typeless) { |
| 958 | return Handle(texture_type); | 1134 | return Handle(texture_type); |
| @@ -1037,7 +1213,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { | |||
| 1037 | glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); | 1213 | glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); |
| 1038 | 1214 | ||
| 1039 | if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { | 1215 | if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { |
| 1040 | glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); | 1216 | const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); |
| 1217 | glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); | ||
| 1041 | } else { | 1218 | } else { |
| 1042 | LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); | 1219 | LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); |
| 1043 | } | 1220 | } |
| @@ -1056,13 +1233,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { | |||
| 1056 | 1233 | ||
| 1057 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, | 1234 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1058 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | 1235 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { |
| 1059 | // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of | 1236 | framebuffer.Create(); |
| 1060 | // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared | 1237 | GLuint handle = framebuffer.handle; |
| 1061 | // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with | ||
| 1062 | // mismatching size, this is why core framebuffers are preferred. | ||
| 1063 | GLuint handle; | ||
| 1064 | glGenFramebuffers(1, &handle); | ||
| 1065 | glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); | ||
| 1066 | 1238 | ||
| 1067 | GLsizei num_buffers = 0; | 1239 | GLsizei num_buffers = 0; |
| 1068 | std::array<GLenum, NUM_RT> gl_draw_buffers; | 1240 | std::array<GLenum, NUM_RT> gl_draw_buffers; |
| @@ -1110,31 +1282,31 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1110 | const std::string name = VideoCommon::Name(key); | 1282 | const std::string name = VideoCommon::Name(key); |
| 1111 | glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); | 1283 | glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); |
| 1112 | } | 1284 | } |
| 1113 | framebuffer.handle = handle; | ||
| 1114 | } | 1285 | } |
| 1115 | 1286 | ||
| 1287 | Framebuffer::~Framebuffer() = default; | ||
| 1288 | |||
| 1116 | void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, | 1289 | void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, |
| 1117 | std::span<const VideoCommon::ImageCopy> copies) { | 1290 | std::span<const VideoCommon::ImageCopy> copies) { |
| 1118 | static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; | 1291 | static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; |
| 1119 | const u32 requested_pbo_size = | 1292 | const u32 img_bpp = BytesPerBlock(src_image.info.format); |
| 1120 | std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); | ||
| 1121 | |||
| 1122 | if (bgr_pbo_size < requested_pbo_size) { | ||
| 1123 | bgr_pbo.Create(); | ||
| 1124 | bgr_pbo_size = requested_pbo_size; | ||
| 1125 | glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); | ||
| 1126 | } | ||
| 1127 | for (const ImageCopy& copy : copies) { | 1293 | for (const ImageCopy& copy : copies) { |
| 1128 | ASSERT(copy.src_offset == zero_offset); | 1294 | ASSERT(copy.src_offset == zero_offset); |
| 1129 | ASSERT(copy.dst_offset == zero_offset); | 1295 | ASSERT(copy.dst_offset == zero_offset); |
| 1130 | 1296 | const u32 num_src_layers = static_cast<u32>(copy.src_subresource.num_layers); | |
| 1297 | const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp; | ||
| 1298 | if (bgr_pbo_size < copy_size) { | ||
| 1299 | bgr_pbo.Create(); | ||
| 1300 | bgr_pbo_size = copy_size; | ||
| 1301 | glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); | ||
| 1302 | } | ||
| 1131 | // Copy from source to PBO | 1303 | // Copy from source to PBO |
| 1132 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | 1304 | glPixelStorei(GL_PACK_ALIGNMENT, 1); |
| 1133 | glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); | 1305 | glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); |
| 1134 | glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); | 1306 | glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); |
| 1135 | glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | 1307 | glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, |
| 1136 | copy.src_subresource.num_layers, src_image.GlFormat(), | 1308 | num_src_layers, src_image.GlFormat(), src_image.GlType(), |
| 1137 | src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr); | 1309 | static_cast<GLsizei>(bgr_pbo_size), nullptr); |
| 1138 | 1310 | ||
| 1139 | // Copy from PBO to destination in desired GL format | 1311 | // Copy from PBO to destination in desired GL format |
| 1140 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | 1312 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ca2c90be..1bb762568 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -15,6 +15,10 @@ | |||
| 15 | #include "video_core/texture_cache/image_view_base.h" | 15 | #include "video_core/texture_cache/image_view_base.h" |
| 16 | #include "video_core/texture_cache/texture_cache_base.h" | 16 | #include "video_core/texture_cache/texture_cache_base.h" |
| 17 | 17 | ||
| 18 | namespace Settings { | ||
| 19 | struct ResolutionScalingInfo; | ||
| 20 | } | ||
| 21 | |||
| 18 | namespace OpenGL { | 22 | namespace OpenGL { |
| 19 | 23 | ||
| 20 | class Device; | 24 | class Device; |
| @@ -78,9 +82,11 @@ public: | |||
| 78 | 82 | ||
| 79 | ImageBufferMap DownloadStagingBuffer(size_t size); | 83 | ImageBufferMap DownloadStagingBuffer(size_t size); |
| 80 | 84 | ||
| 85 | u64 GetDeviceLocalMemory() const; | ||
| 86 | |||
| 81 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 87 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 82 | 88 | ||
| 83 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { | 89 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { |
| 84 | UNIMPLEMENTED(); | 90 | UNIMPLEMENTED(); |
| 85 | } | 91 | } |
| 86 | 92 | ||
| @@ -110,6 +116,12 @@ public: | |||
| 110 | 116 | ||
| 111 | bool HasNativeASTC() const noexcept; | 117 | bool HasNativeASTC() const noexcept; |
| 112 | 118 | ||
| 119 | void TickFrame() {} | ||
| 120 | |||
| 121 | StateTracker& GetStateTracker() { | ||
| 122 | return state_tracker; | ||
| 123 | } | ||
| 124 | |||
| 113 | private: | 125 | private: |
| 114 | struct StagingBuffers { | 126 | struct StagingBuffers { |
| 115 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | 127 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); |
| @@ -149,6 +161,10 @@ private: | |||
| 149 | OGLTextureView null_image_view_cube; | 161 | OGLTextureView null_image_view_cube; |
| 150 | 162 | ||
| 151 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; | 163 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; |
| 164 | |||
| 165 | std::array<OGLFramebuffer, 3> rescale_draw_fbos; | ||
| 166 | std::array<OGLFramebuffer, 3> rescale_read_fbos; | ||
| 167 | const Settings::ResolutionScalingInfo& resolution; | ||
| 152 | }; | 168 | }; |
| 153 | 169 | ||
| 154 | class Image : public VideoCommon::ImageBase { | 170 | class Image : public VideoCommon::ImageBase { |
| @@ -157,6 +173,7 @@ class Image : public VideoCommon::ImageBase { | |||
| 157 | public: | 173 | public: |
| 158 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 174 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 159 | VAddr cpu_addr); | 175 | VAddr cpu_addr); |
| 176 | explicit Image(const VideoCommon::NullImageParams&); | ||
| 160 | 177 | ||
| 161 | ~Image(); | 178 | ~Image(); |
| 162 | 179 | ||
| @@ -174,7 +191,7 @@ public: | |||
| 174 | GLuint StorageHandle() noexcept; | 191 | GLuint StorageHandle() noexcept; |
| 175 | 192 | ||
| 176 | GLuint Handle() const noexcept { | 193 | GLuint Handle() const noexcept { |
| 177 | return texture.handle; | 194 | return current_texture; |
| 178 | } | 195 | } |
| 179 | 196 | ||
| 180 | GLuint GlFormat() const noexcept { | 197 | GLuint GlFormat() const noexcept { |
| @@ -185,16 +202,25 @@ public: | |||
| 185 | return gl_type; | 202 | return gl_type; |
| 186 | } | 203 | } |
| 187 | 204 | ||
| 205 | bool ScaleUp(bool ignore = false); | ||
| 206 | |||
| 207 | bool ScaleDown(bool ignore = false); | ||
| 208 | |||
| 188 | private: | 209 | private: |
| 189 | void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); | 210 | void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 190 | 211 | ||
| 191 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); | 212 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 192 | 213 | ||
| 214 | void Scale(bool up_scale); | ||
| 215 | |||
| 193 | OGLTexture texture; | 216 | OGLTexture texture; |
| 217 | OGLTexture upscaled_backup; | ||
| 194 | OGLTextureView store_view; | 218 | OGLTextureView store_view; |
| 195 | GLenum gl_internal_format = GL_NONE; | 219 | GLenum gl_internal_format = GL_NONE; |
| 196 | GLenum gl_format = GL_NONE; | 220 | GLenum gl_format = GL_NONE; |
| 197 | GLenum gl_type = GL_NONE; | 221 | GLenum gl_type = GL_NONE; |
| 222 | TextureCacheRuntime* runtime{}; | ||
| 223 | GLuint current_texture{}; | ||
| 198 | }; | 224 | }; |
| 199 | 225 | ||
| 200 | class ImageView : public VideoCommon::ImageViewBase { | 226 | class ImageView : public VideoCommon::ImageViewBase { |
| @@ -206,7 +232,15 @@ public: | |||
| 206 | const VideoCommon::ImageViewInfo&, GPUVAddr); | 232 | const VideoCommon::ImageViewInfo&, GPUVAddr); |
| 207 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | 233 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, |
| 208 | const VideoCommon::ImageViewInfo& view_info); | 234 | const VideoCommon::ImageViewInfo& view_info); |
| 209 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | 235 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); |
| 236 | |||
| 237 | ~ImageView(); | ||
| 238 | |||
| 239 | ImageView(const ImageView&) = delete; | ||
| 240 | ImageView& operator=(const ImageView&) = delete; | ||
| 241 | |||
| 242 | ImageView(ImageView&&) = default; | ||
| 243 | ImageView& operator=(ImageView&&) = default; | ||
| 210 | 244 | ||
| 211 | [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, | 245 | [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, |
| 212 | Shader::ImageFormat image_format); | 246 | Shader::ImageFormat image_format); |
| @@ -276,6 +310,14 @@ public: | |||
| 276 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, | 310 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, |
| 277 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); | 311 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); |
| 278 | 312 | ||
| 313 | ~Framebuffer(); | ||
| 314 | |||
| 315 | Framebuffer(const Framebuffer&) = delete; | ||
| 316 | Framebuffer& operator=(const Framebuffer&) = delete; | ||
| 317 | |||
| 318 | Framebuffer(Framebuffer&&) = default; | ||
| 319 | Framebuffer& operator=(Framebuffer&&) = default; | ||
| 320 | |||
| 279 | [[nodiscard]] GLuint Handle() const noexcept { | 321 | [[nodiscard]] GLuint Handle() const noexcept { |
| 280 | return framebuffer.handle; | 322 | return framebuffer.handle; |
| 281 | } | 323 | } |
| @@ -293,7 +335,7 @@ struct TextureCacheParams { | |||
| 293 | static constexpr bool ENABLE_VALIDATION = true; | 335 | static constexpr bool ENABLE_VALIDATION = true; |
| 294 | static constexpr bool FRAMEBUFFER_BLITS = true; | 336 | static constexpr bool FRAMEBUFFER_BLITS = true; |
| 295 | static constexpr bool HAS_EMULATED_COPIES = true; | 337 | static constexpr bool HAS_EMULATED_COPIES = true; |
| 296 | static constexpr bool HAS_DEVICE_MEMORY_INFO = false; | 338 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; |
| 297 | 339 | ||
| 298 | using Runtime = OpenGL::TextureCacheRuntime; | 340 | using Runtime = OpenGL::TextureCacheRuntime; |
| 299 | using Image = OpenGL::Image; | 341 | using Image = OpenGL::Image; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7d7cba69c..28daacd82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -21,8 +21,13 @@ | |||
| 21 | #include "core/memory.h" | 21 | #include "core/memory.h" |
| 22 | #include "core/perf_stats.h" | 22 | #include "core/perf_stats.h" |
| 23 | #include "core/telemetry_session.h" | 23 | #include "core/telemetry_session.h" |
| 24 | #include "video_core/host_shaders/fxaa_frag.h" | ||
| 25 | #include "video_core/host_shaders/fxaa_vert.h" | ||
| 24 | #include "video_core/host_shaders/opengl_present_frag.h" | 26 | #include "video_core/host_shaders/opengl_present_frag.h" |
| 27 | #include "video_core/host_shaders/opengl_present_scaleforce_frag.h" | ||
| 25 | #include "video_core/host_shaders/opengl_present_vert.h" | 28 | #include "video_core/host_shaders/opengl_present_vert.h" |
| 29 | #include "video_core/host_shaders/present_bicubic_frag.h" | ||
| 30 | #include "video_core/host_shaders/present_gaussian_frag.h" | ||
| 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 31 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_util.h" | 33 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| @@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 208 | framebuffer_crop_rect = framebuffer.crop_rect; | 213 | framebuffer_crop_rect = framebuffer.crop_rect; |
| 209 | 214 | ||
| 210 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; | 215 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; |
| 211 | if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { | 216 | screen_info.was_accelerated = |
| 217 | rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); | ||
| 218 | if (screen_info.was_accelerated) { | ||
| 212 | return; | 219 | return; |
| 213 | } | 220 | } |
| 214 | 221 | ||
| @@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color | |||
| 251 | 258 | ||
| 252 | void RendererOpenGL::InitOpenGLObjects() { | 259 | void RendererOpenGL::InitOpenGLObjects() { |
| 253 | // Create shader programs | 260 | // Create shader programs |
| 261 | fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); | ||
| 262 | fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); | ||
| 254 | present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); | 263 | present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); |
| 255 | present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); | 264 | present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); |
| 265 | present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); | ||
| 266 | present_gaussian_fragment = | ||
| 267 | CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); | ||
| 268 | present_scaleforce_fragment = | ||
| 269 | CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), | ||
| 270 | GL_FRAGMENT_SHADER); | ||
| 256 | 271 | ||
| 257 | // Generate presentation sampler | 272 | // Generate presentation sampler |
| 258 | present_sampler.Create(); | 273 | present_sampler.Create(); |
| 259 | glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 274 | glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| 275 | glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||
| 276 | |||
| 277 | present_sampler_nn.Create(); | ||
| 278 | glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | ||
| 279 | glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | ||
| 260 | 280 | ||
| 261 | // Generate VBO handle for drawing | 281 | // Generate VBO handle for drawing |
| 262 | vertex_buffer.Create(); | 282 | vertex_buffer.Create(); |
| @@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 274 | 294 | ||
| 275 | // Clear screen to black | 295 | // Clear screen to black |
| 276 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 296 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 297 | |||
| 298 | fxaa_framebuffer.Create(); | ||
| 277 | } | 299 | } |
| 278 | 300 | ||
| 279 | void RendererOpenGL::AddTelemetryFields() { | 301 | void RendererOpenGL::AddTelemetryFields() { |
| @@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 325 | texture.resource.Release(); | 347 | texture.resource.Release(); |
| 326 | texture.resource.Create(GL_TEXTURE_2D); | 348 | texture.resource.Create(GL_TEXTURE_2D); |
| 327 | glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); | 349 | glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); |
| 350 | fxaa_texture.Release(); | ||
| 351 | fxaa_texture.Create(GL_TEXTURE_2D); | ||
| 352 | glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F, | ||
| 353 | Settings::values.resolution_info.ScaleUp(screen_info.texture.width), | ||
| 354 | Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); | ||
| 355 | glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle, | ||
| 356 | 0); | ||
| 328 | } | 357 | } |
| 329 | 358 | ||
| 330 | void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | 359 | void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { |
| 360 | // TODO: Signal state tracker about these changes | ||
| 361 | state_tracker.NotifyScreenDrawVertexArray(); | ||
| 362 | state_tracker.NotifyPolygonModes(); | ||
| 363 | state_tracker.NotifyViewport0(); | ||
| 364 | state_tracker.NotifyScissor0(); | ||
| 365 | state_tracker.NotifyColorMask(0); | ||
| 366 | state_tracker.NotifyBlend0(); | ||
| 367 | state_tracker.NotifyFramebuffer(); | ||
| 368 | state_tracker.NotifyFrontFace(); | ||
| 369 | state_tracker.NotifyCullTest(); | ||
| 370 | state_tracker.NotifyDepthTest(); | ||
| 371 | state_tracker.NotifyStencilTest(); | ||
| 372 | state_tracker.NotifyPolygonOffset(); | ||
| 373 | state_tracker.NotifyRasterizeEnable(); | ||
| 374 | state_tracker.NotifyFramebufferSRGB(); | ||
| 375 | state_tracker.NotifyLogicOp(); | ||
| 376 | state_tracker.NotifyClipControl(); | ||
| 377 | state_tracker.NotifyAlphaTest(); | ||
| 378 | |||
| 379 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 380 | |||
| 331 | // Update background color before drawing | 381 | // Update background color before drawing |
| 332 | glClearColor(Settings::values.bg_red.GetValue() / 255.0f, | 382 | glClearColor(Settings::values.bg_red.GetValue() / 255.0f, |
| 333 | Settings::values.bg_green.GetValue() / 255.0f, | 383 | Settings::values.bg_green.GetValue() / 255.0f, |
| 334 | Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); | 384 | Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); |
| 335 | 385 | ||
| 386 | glEnable(GL_CULL_FACE); | ||
| 387 | glDisable(GL_COLOR_LOGIC_OP); | ||
| 388 | glDisable(GL_DEPTH_TEST); | ||
| 389 | glDisable(GL_STENCIL_TEST); | ||
| 390 | glDisable(GL_POLYGON_OFFSET_FILL); | ||
| 391 | glDisable(GL_RASTERIZER_DISCARD); | ||
| 392 | glDisable(GL_ALPHA_TEST); | ||
| 393 | glDisablei(GL_BLEND, 0); | ||
| 394 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); | ||
| 395 | glCullFace(GL_BACK); | ||
| 396 | glFrontFace(GL_CW); | ||
| 397 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | ||
| 398 | |||
| 399 | glBindTextureUnit(0, screen_info.display_texture); | ||
| 400 | |||
| 401 | if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) { | ||
| 402 | program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); | ||
| 403 | |||
| 404 | glEnablei(GL_SCISSOR_TEST, 0); | ||
| 405 | auto viewport_width = screen_info.texture.width; | ||
| 406 | auto scissor_width = framebuffer_crop_rect.GetWidth(); | ||
| 407 | if (scissor_width <= 0) { | ||
| 408 | scissor_width = viewport_width; | ||
| 409 | } | ||
| 410 | auto viewport_height = screen_info.texture.height; | ||
| 411 | auto scissor_height = framebuffer_crop_rect.GetHeight(); | ||
| 412 | if (scissor_height <= 0) { | ||
| 413 | scissor_height = viewport_height; | ||
| 414 | } | ||
| 415 | if (screen_info.was_accelerated) { | ||
| 416 | viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); | ||
| 417 | scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); | ||
| 418 | viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); | ||
| 419 | scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); | ||
| 420 | } | ||
| 421 | glScissorIndexed(0, 0, 0, scissor_width, scissor_height); | ||
| 422 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width), | ||
| 423 | static_cast<GLfloat>(viewport_height)); | ||
| 424 | glDepthRangeIndexed(0, 0.0, 0.0); | ||
| 425 | |||
| 426 | glBindSampler(0, present_sampler.handle); | ||
| 427 | GLint old_read_fb; | ||
| 428 | GLint old_draw_fb; | ||
| 429 | glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); | ||
| 430 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); | ||
| 431 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle); | ||
| 432 | |||
| 433 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | ||
| 434 | |||
| 435 | glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); | ||
| 436 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); | ||
| 437 | |||
| 438 | glBindTextureUnit(0, fxaa_texture.handle); | ||
| 439 | } | ||
| 440 | |||
| 336 | // Set projection matrix | 441 | // Set projection matrix |
| 337 | const std::array ortho_matrix = | 442 | const std::array ortho_matrix = |
| 338 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); | 443 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); |
| 339 | program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); | 444 | |
| 445 | GLuint fragment_handle; | ||
| 446 | const auto filter = Settings::values.scaling_filter.GetValue(); | ||
| 447 | switch (filter) { | ||
| 448 | case Settings::ScalingFilter::NearestNeighbor: | ||
| 449 | fragment_handle = present_bilinear_fragment.handle; | ||
| 450 | break; | ||
| 451 | case Settings::ScalingFilter::Bilinear: | ||
| 452 | fragment_handle = present_bilinear_fragment.handle; | ||
| 453 | break; | ||
| 454 | case Settings::ScalingFilter::Bicubic: | ||
| 455 | fragment_handle = present_bicubic_fragment.handle; | ||
| 456 | break; | ||
| 457 | case Settings::ScalingFilter::Gaussian: | ||
| 458 | fragment_handle = present_gaussian_fragment.handle; | ||
| 459 | break; | ||
| 460 | case Settings::ScalingFilter::ScaleForce: | ||
| 461 | fragment_handle = present_scaleforce_fragment.handle; | ||
| 462 | break; | ||
| 463 | case Settings::ScalingFilter::Fsr: | ||
| 464 | LOG_WARNING( | ||
| 465 | Render_OpenGL, | ||
| 466 | "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce"); | ||
| 467 | fragment_handle = present_scaleforce_fragment.handle; | ||
| 468 | break; | ||
| 469 | default: | ||
| 470 | fragment_handle = present_bilinear_fragment.handle; | ||
| 471 | break; | ||
| 472 | } | ||
| 473 | program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); | ||
| 340 | glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, | 474 | glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, |
| 341 | ortho_matrix.data()); | 475 | ortho_matrix.data()); |
| 342 | 476 | ||
| @@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 370 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / | 504 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / |
| 371 | static_cast<f32>(screen_info.texture.height); | 505 | static_cast<f32>(screen_info.texture.height); |
| 372 | } | 506 | } |
| 507 | if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && | ||
| 508 | !screen_info.was_accelerated) { | ||
| 509 | scale_u /= Settings::values.resolution_info.up_factor; | ||
| 510 | scale_v /= Settings::values.resolution_info.up_factor; | ||
| 511 | } | ||
| 373 | 512 | ||
| 374 | const auto& screen = layout.screen; | 513 | const auto& screen = layout.screen; |
| 375 | const std::array vertices = { | 514 | const std::array vertices = { |
| @@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 380 | }; | 519 | }; |
| 381 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); | 520 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); |
| 382 | 521 | ||
| 383 | // TODO: Signal state tracker about these changes | ||
| 384 | state_tracker.NotifyScreenDrawVertexArray(); | ||
| 385 | state_tracker.NotifyPolygonModes(); | ||
| 386 | state_tracker.NotifyViewport0(); | ||
| 387 | state_tracker.NotifyScissor0(); | ||
| 388 | state_tracker.NotifyColorMask(0); | ||
| 389 | state_tracker.NotifyBlend0(); | ||
| 390 | state_tracker.NotifyFramebuffer(); | ||
| 391 | state_tracker.NotifyFrontFace(); | ||
| 392 | state_tracker.NotifyCullTest(); | ||
| 393 | state_tracker.NotifyDepthTest(); | ||
| 394 | state_tracker.NotifyStencilTest(); | ||
| 395 | state_tracker.NotifyPolygonOffset(); | ||
| 396 | state_tracker.NotifyRasterizeEnable(); | ||
| 397 | state_tracker.NotifyFramebufferSRGB(); | ||
| 398 | state_tracker.NotifyLogicOp(); | ||
| 399 | state_tracker.NotifyClipControl(); | ||
| 400 | state_tracker.NotifyAlphaTest(); | ||
| 401 | |||
| 402 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 403 | glEnable(GL_CULL_FACE); | ||
| 404 | if (screen_info.display_srgb) { | 522 | if (screen_info.display_srgb) { |
| 405 | glEnable(GL_FRAMEBUFFER_SRGB); | 523 | glEnable(GL_FRAMEBUFFER_SRGB); |
| 406 | } else { | 524 | } else { |
| 407 | glDisable(GL_FRAMEBUFFER_SRGB); | 525 | glDisable(GL_FRAMEBUFFER_SRGB); |
| 408 | } | 526 | } |
| 409 | glDisable(GL_COLOR_LOGIC_OP); | ||
| 410 | glDisable(GL_DEPTH_TEST); | ||
| 411 | glDisable(GL_STENCIL_TEST); | ||
| 412 | glDisable(GL_POLYGON_OFFSET_FILL); | ||
| 413 | glDisable(GL_RASTERIZER_DISCARD); | ||
| 414 | glDisable(GL_ALPHA_TEST); | ||
| 415 | glDisablei(GL_BLEND, 0); | ||
| 416 | glDisablei(GL_SCISSOR_TEST, 0); | 527 | glDisablei(GL_SCISSOR_TEST, 0); |
| 417 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); | ||
| 418 | glCullFace(GL_BACK); | ||
| 419 | glFrontFace(GL_CW); | ||
| 420 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | ||
| 421 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), | 528 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), |
| 422 | static_cast<GLfloat>(layout.height)); | 529 | static_cast<GLfloat>(layout.height)); |
| 423 | glDepthRangeIndexed(0, 0.0, 0.0); | ||
| 424 | 530 | ||
| 425 | glEnableVertexAttribArray(PositionLocation); | 531 | glEnableVertexAttribArray(PositionLocation); |
| 426 | glEnableVertexAttribArray(TexCoordLocation); | 532 | glEnableVertexAttribArray(TexCoordLocation); |
| @@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 440 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | 546 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); |
| 441 | } | 547 | } |
| 442 | 548 | ||
| 443 | glBindTextureUnit(0, screen_info.display_texture); | 549 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { |
| 444 | glBindSampler(0, present_sampler.handle); | 550 | glBindSampler(0, present_sampler.handle); |
| 551 | } else { | ||
| 552 | glBindSampler(0, present_sampler_nn.handle); | ||
| 553 | } | ||
| 445 | 554 | ||
| 446 | glClear(GL_COLOR_BUFFER_BIT); | 555 | glClear(GL_COLOR_BUFFER_BIT); |
| 447 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 556 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index d455f572f..cda333cad 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -50,6 +50,7 @@ struct TextureInfo { | |||
| 50 | /// Structure used for storing information about the display target for the Switch screen | 50 | /// Structure used for storing information about the display target for the Switch screen |
| 51 | struct ScreenInfo { | 51 | struct ScreenInfo { |
| 52 | GLuint display_texture{}; | 52 | GLuint display_texture{}; |
| 53 | bool was_accelerated = false; | ||
| 53 | bool display_srgb{}; | 54 | bool display_srgb{}; |
| 54 | const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; | 55 | const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; |
| 55 | TextureInfo texture; | 56 | TextureInfo texture; |
| @@ -109,9 +110,15 @@ private: | |||
| 109 | 110 | ||
| 110 | // OpenGL object IDs | 111 | // OpenGL object IDs |
| 111 | OGLSampler present_sampler; | 112 | OGLSampler present_sampler; |
| 113 | OGLSampler present_sampler_nn; | ||
| 112 | OGLBuffer vertex_buffer; | 114 | OGLBuffer vertex_buffer; |
| 115 | OGLProgram fxaa_vertex; | ||
| 116 | OGLProgram fxaa_fragment; | ||
| 113 | OGLProgram present_vertex; | 117 | OGLProgram present_vertex; |
| 114 | OGLProgram present_fragment; | 118 | OGLProgram present_bilinear_fragment; |
| 119 | OGLProgram present_bicubic_fragment; | ||
| 120 | OGLProgram present_gaussian_fragment; | ||
| 121 | OGLProgram present_scaleforce_fragment; | ||
| 115 | OGLFramebuffer screenshot_framebuffer; | 122 | OGLFramebuffer screenshot_framebuffer; |
| 116 | 123 | ||
| 117 | // GPU address of the vertex buffer | 124 | // GPU address of the vertex buffer |
| @@ -119,6 +126,8 @@ private: | |||
| 119 | 126 | ||
| 120 | /// Display information for Switch screen | 127 | /// Display information for Switch screen |
| 121 | ScreenInfo screen_info; | 128 | ScreenInfo screen_info; |
| 129 | OGLTexture fxaa_texture; | ||
| 130 | OGLFramebuffer fxaa_framebuffer; | ||
| 122 | 131 | ||
| 123 | /// OpenGL framebuffer data | 132 | /// OpenGL framebuffer data |
| 124 | std::vector<u8> gl_framebuffer_data; | 133 | std::vector<u8> gl_framebuffer_data; |
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c1b2f063..b3884a4f5 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | |||
| 363 | 363 | ||
| 364 | BlitImageHelper::~BlitImageHelper() = default; | 364 | BlitImageHelper::~BlitImageHelper() = default; |
| 365 | 365 | ||
| 366 | void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | 366 | void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view, |
| 367 | const Region2D& dst_region, const Region2D& src_region, | 367 | const Region2D& dst_region, const Region2D& src_region, |
| 368 | Tegra::Engines::Fermi2D::Filter filter, | 368 | Tegra::Engines::Fermi2D::Filter filter, |
| 369 | Tegra::Engines::Fermi2D::Operation operation) { | 369 | Tegra::Engines::Fermi2D::Operation operation) { |
| @@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV | |||
| 373 | .operation = operation, | 373 | .operation = operation, |
| 374 | }; | 374 | }; |
| 375 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 375 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| 376 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); | ||
| 377 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; | 376 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; |
| 378 | const VkPipeline pipeline = FindOrEmplacePipeline(key); | 377 | const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); |
| 379 | scheduler.RequestRenderpass(dst_framebuffer); | 378 | scheduler.RequestRenderpass(dst_framebuffer); |
| 380 | scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, | 379 | scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, |
| 381 | src_view](vk::CommandBuffer cmdbuf) { | 380 | src_view](vk::CommandBuffer cmdbuf) { |
| @@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | |||
| 398 | Tegra::Engines::Fermi2D::Operation operation) { | 397 | Tegra::Engines::Fermi2D::Operation operation) { |
| 399 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); | 398 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); |
| 400 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); | 399 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); |
| 401 | 400 | const BlitImagePipelineKey key{ | |
| 401 | .renderpass = dst_framebuffer->RenderPass(), | ||
| 402 | .operation = operation, | ||
| 403 | }; | ||
| 402 | const VkPipelineLayout layout = *two_textures_pipeline_layout; | 404 | const VkPipelineLayout layout = *two_textures_pipeline_layout; |
| 403 | const VkSampler sampler = *nearest_sampler; | 405 | const VkSampler sampler = *nearest_sampler; |
| 404 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); | 406 | const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); |
| 405 | scheduler.RequestRenderpass(dst_framebuffer); | 407 | scheduler.RequestRenderpass(dst_framebuffer); |
| 406 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, | 408 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, |
| 407 | src_stencil_view, this](vk::CommandBuffer cmdbuf) { | 409 | src_stencil_view, this](vk::CommandBuffer cmdbuf) { |
| @@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | |||
| 419 | } | 421 | } |
| 420 | 422 | ||
| 421 | void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, | 423 | void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, |
| 422 | const ImageView& src_image_view) { | 424 | const ImageView& src_image_view, u32 up_scale, |
| 425 | u32 down_shift) { | ||
| 423 | ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); | 426 | ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); |
| 424 | Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); | 427 | Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); |
| 425 | } | 428 | } |
| 426 | 429 | ||
| 427 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, | 430 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, |
| 428 | const ImageView& src_image_view) { | 431 | const ImageView& src_image_view, u32 up_scale, |
| 432 | u32 down_shift) { | ||
| 429 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); | 433 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); |
| 430 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); | 434 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); |
| 431 | } | 435 | } |
| 432 | 436 | ||
| 433 | void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, | 437 | void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, |
| 434 | const ImageView& src_image_view) { | 438 | const ImageView& src_image_view, u32 up_scale, |
| 439 | u32 down_shift) { | ||
| 435 | ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); | 440 | ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); |
| 436 | Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); | 441 | Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); |
| 437 | } | 442 | } |
| 438 | 443 | ||
| 439 | void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, | 444 | void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, |
| 440 | const ImageView& src_image_view) { | 445 | const ImageView& src_image_view, u32 up_scale, |
| 446 | u32 down_shift) { | ||
| 441 | ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); | 447 | ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); |
| 442 | Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); | 448 | Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); |
| 443 | } | 449 | } |
| 444 | 450 | ||
| 445 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | 451 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, |
| 446 | const ImageView& src_image_view) { | 452 | const ImageView& src_image_view, u32 up_scale, u32 down_shift) { |
| 447 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 453 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| 448 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); | 454 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); |
| 449 | const VkSampler sampler = *nearest_sampler; | 455 | const VkSampler sampler = *nearest_sampler; |
| 450 | const VkExtent2D extent{ | 456 | const VkExtent2D extent{ |
| 451 | .width = src_image_view.size.width, | 457 | .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), |
| 452 | .height = src_image_view.size.height, | 458 | .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), |
| 453 | }; | 459 | }; |
| 454 | scheduler.RequestRenderpass(dst_framebuffer); | 460 | scheduler.RequestRenderpass(dst_framebuffer); |
| 455 | scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { | 461 | scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, |
| 462 | this](vk::CommandBuffer cmdbuf) { | ||
| 456 | const VkOffset2D offset{ | 463 | const VkOffset2D offset{ |
| 457 | .x = 0, | 464 | .x = 0, |
| 458 | .y = 0, | 465 | .y = 0, |
| @@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb | |||
| 488 | scheduler.InvalidateState(); | 495 | scheduler.InvalidateState(); |
| 489 | } | 496 | } |
| 490 | 497 | ||
| 491 | VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { | 498 | VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { |
| 492 | const auto it = std::ranges::find(blit_color_keys, key); | 499 | const auto it = std::ranges::find(blit_color_keys, key); |
| 493 | if (it != blit_color_keys.end()) { | 500 | if (it != blit_color_keys.end()) { |
| 494 | return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; | 501 | return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; |
| @@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke | |||
| 542 | return *blit_color_pipelines.back(); | 549 | return *blit_color_pipelines.back(); |
| 543 | } | 550 | } |
| 544 | 551 | ||
| 545 | VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { | 552 | VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) { |
| 546 | if (blit_depth_stencil_pipeline) { | 553 | const auto it = std::ranges::find(blit_depth_stencil_keys, key); |
| 547 | return *blit_depth_stencil_pipeline; | 554 | if (it != blit_depth_stencil_keys.end()) { |
| 555 | return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)]; | ||
| 548 | } | 556 | } |
| 557 | blit_depth_stencil_keys.push_back(key); | ||
| 549 | const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); | 558 | const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); |
| 550 | blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ | 559 | blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ |
| 551 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 560 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 552 | .pNext = nullptr, | 561 | .pNext = nullptr, |
| 553 | .flags = 0, | 562 | .flags = 0, |
| @@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { | |||
| 560 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | 569 | .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, |
| 561 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | 570 | .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |
| 562 | .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | 571 | .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, |
| 563 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, | 572 | .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, |
| 564 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, | 573 | .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, |
| 565 | .layout = *two_textures_pipeline_layout, | 574 | .layout = *two_textures_pipeline_layout, |
| 566 | .renderPass = renderpass, | 575 | .renderPass = key.renderpass, |
| 567 | .subpass = 0, | 576 | .subpass = 0, |
| 568 | .basePipelineHandle = VK_NULL_HANDLE, | 577 | .basePipelineHandle = VK_NULL_HANDLE, |
| 569 | .basePipelineIndex = 0, | 578 | .basePipelineIndex = 0, |
| 570 | }); | 579 | })); |
| 571 | return *blit_depth_stencil_pipeline; | 580 | return *blit_depth_stencil_pipelines.back(); |
| 572 | } | 581 | } |
| 573 | 582 | ||
| 574 | void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { | 583 | void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { |
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 33ee095c1..d77f76678 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h | |||
| @@ -34,7 +34,7 @@ public: | |||
| 34 | StateTracker& state_tracker, DescriptorPool& descriptor_pool); | 34 | StateTracker& state_tracker, DescriptorPool& descriptor_pool); |
| 35 | ~BlitImageHelper(); | 35 | ~BlitImageHelper(); |
| 36 | 36 | ||
| 37 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | 37 | void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, |
| 38 | const Region2D& dst_region, const Region2D& src_region, | 38 | const Region2D& dst_region, const Region2D& src_region, |
| 39 | Tegra::Engines::Fermi2D::Filter filter, | 39 | Tegra::Engines::Fermi2D::Filter filter, |
| 40 | Tegra::Engines::Fermi2D::Operation operation); | 40 | Tegra::Engines::Fermi2D::Operation operation); |
| @@ -44,21 +44,25 @@ public: | |||
| 44 | const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, | 44 | const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, |
| 45 | Tegra::Engines::Fermi2D::Operation operation); | 45 | Tegra::Engines::Fermi2D::Operation operation); |
| 46 | 46 | ||
| 47 | void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | 47 | void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
| 48 | u32 up_scale, u32 down_shift); | ||
| 48 | 49 | ||
| 49 | void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | 50 | void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
| 51 | u32 up_scale, u32 down_shift); | ||
| 50 | 52 | ||
| 51 | void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | 53 | void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
| 54 | u32 up_scale, u32 down_shift); | ||
| 52 | 55 | ||
| 53 | void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); | 56 | void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
| 57 | u32 up_scale, u32 down_shift); | ||
| 54 | 58 | ||
| 55 | private: | 59 | private: |
| 56 | void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | 60 | void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, |
| 57 | const ImageView& src_image_view); | 61 | const ImageView& src_image_view, u32 up_scale, u32 down_shift); |
| 58 | 62 | ||
| 59 | [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); | 63 | [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); |
| 60 | 64 | ||
| 61 | [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); | 65 | [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); |
| 62 | 66 | ||
| 63 | void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); | 67 | void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); |
| 64 | 68 | ||
| @@ -84,7 +88,8 @@ private: | |||
| 84 | 88 | ||
| 85 | std::vector<BlitImagePipelineKey> blit_color_keys; | 89 | std::vector<BlitImagePipelineKey> blit_color_keys; |
| 86 | std::vector<vk::Pipeline> blit_color_pipelines; | 90 | std::vector<vk::Pipeline> blit_color_pipelines; |
| 87 | vk::Pipeline blit_depth_stencil_pipeline; | 91 | std::vector<BlitImagePipelineKey> blit_depth_stencil_keys; |
| 92 | std::vector<vk::Pipeline> blit_depth_stencil_pipelines; | ||
| 88 | vk::Pipeline convert_d32_to_r32_pipeline; | 93 | vk::Pipeline convert_d32_to_r32_pipeline; |
| 89 | vk::Pipeline convert_r32_to_d32_pipeline; | 94 | vk::Pipeline convert_r32_to_d32_pipeline; |
| 90 | vk::Pipeline convert_d16_to_r16_pipeline; | 95 | vk::Pipeline convert_d16_to_r16_pipeline; |
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 4847db6b6..11c160570 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | 14 | #include "shader_recompiler/shader_info.h" |
| 14 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 15 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 15 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| @@ -20,6 +21,8 @@ | |||
| 20 | 21 | ||
| 21 | namespace Vulkan { | 22 | namespace Vulkan { |
| 22 | 23 | ||
| 24 | using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; | ||
| 25 | |||
| 23 | class DescriptorLayoutBuilder { | 26 | class DescriptorLayoutBuilder { |
| 24 | public: | 27 | public: |
| 25 | DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} | 28 | DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} |
| @@ -68,18 +71,28 @@ public: | |||
| 68 | } | 71 | } |
| 69 | 72 | ||
| 70 | vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { | 73 | vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { |
| 74 | using Shader::Backend::SPIRV::RescalingLayout; | ||
| 75 | const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u; | ||
| 76 | const VkPushConstantRange range{ | ||
| 77 | .stageFlags = static_cast<VkShaderStageFlags>( | ||
| 78 | is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), | ||
| 79 | .offset = 0, | ||
| 80 | .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset, | ||
| 81 | }; | ||
| 71 | return device->GetLogical().CreatePipelineLayout({ | 82 | return device->GetLogical().CreatePipelineLayout({ |
| 72 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 83 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| 73 | .pNext = nullptr, | 84 | .pNext = nullptr, |
| 74 | .flags = 0, | 85 | .flags = 0, |
| 75 | .setLayoutCount = descriptor_set_layout ? 1U : 0U, | 86 | .setLayoutCount = descriptor_set_layout ? 1U : 0U, |
| 76 | .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, | 87 | .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, |
| 77 | .pushConstantRangeCount = 0, | 88 | .pushConstantRangeCount = 1, |
| 78 | .pPushConstantRanges = nullptr, | 89 | .pPushConstantRanges = &range, |
| 79 | }); | 90 | }); |
| 80 | } | 91 | } |
| 81 | 92 | ||
| 82 | void Add(const Shader::Info& info, VkShaderStageFlags stage) { | 93 | void Add(const Shader::Info& info, VkShaderStageFlags stage) { |
| 94 | is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0; | ||
| 95 | |||
| 83 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); | 96 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); |
| 84 | Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); | 97 | Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); |
| 85 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); | 98 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); |
| @@ -115,6 +128,7 @@ private: | |||
| 115 | } | 128 | } |
| 116 | 129 | ||
| 117 | const Device* device{}; | 130 | const Device* device{}; |
| 131 | bool is_compute{}; | ||
| 118 | boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; | 132 | boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; |
| 119 | boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; | 133 | boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; |
| 120 | u32 binding{}; | 134 | u32 binding{}; |
| @@ -122,31 +136,68 @@ private: | |||
| 122 | size_t offset{}; | 136 | size_t offset{}; |
| 123 | }; | 137 | }; |
| 124 | 138 | ||
| 125 | inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, | 139 | class RescalingPushConstant { |
| 126 | const ImageId*& image_view_ids, TextureCache& texture_cache, | 140 | public: |
| 127 | VKUpdateDescriptorQueue& update_descriptor_queue) { | 141 | explicit RescalingPushConstant() noexcept {} |
| 128 | for (const auto& desc : info.texture_buffer_descriptors) { | 142 | |
| 129 | image_view_ids += desc.count; | 143 | void PushTexture(bool is_rescaled) noexcept { |
| 144 | *texture_ptr |= is_rescaled ? texture_bit : 0u; | ||
| 145 | texture_bit <<= 1u; | ||
| 146 | if (texture_bit == 0u) { | ||
| 147 | texture_bit = 1u; | ||
| 148 | ++texture_ptr; | ||
| 149 | } | ||
| 130 | } | 150 | } |
| 131 | for (const auto& desc : info.image_buffer_descriptors) { | 151 | |
| 132 | image_view_ids += desc.count; | 152 | void PushImage(bool is_rescaled) noexcept { |
| 153 | *image_ptr |= is_rescaled ? image_bit : 0u; | ||
| 154 | image_bit <<= 1u; | ||
| 155 | if (image_bit == 0u) { | ||
| 156 | image_bit = 1u; | ||
| 157 | ++image_ptr; | ||
| 158 | } | ||
| 133 | } | 159 | } |
| 160 | |||
| 161 | const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept { | ||
| 162 | return words; | ||
| 163 | } | ||
| 164 | |||
| 165 | private: | ||
| 166 | std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{}; | ||
| 167 | u32* texture_ptr{words.data()}; | ||
| 168 | u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS}; | ||
| 169 | u32 texture_bit{1u}; | ||
| 170 | u32 image_bit{1u}; | ||
| 171 | }; | ||
| 172 | |||
| 173 | inline void PushImageDescriptors(TextureCache& texture_cache, | ||
| 174 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 175 | const Shader::Info& info, RescalingPushConstant& rescaling, | ||
| 176 | const VkSampler*& samplers, | ||
| 177 | const VideoCommon::ImageViewInOut*& views) { | ||
| 178 | const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); | ||
| 179 | const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); | ||
| 180 | views += num_texture_buffers; | ||
| 181 | views += num_image_buffers; | ||
| 134 | for (const auto& desc : info.texture_descriptors) { | 182 | for (const auto& desc : info.texture_descriptors) { |
| 135 | for (u32 index = 0; index < desc.count; ++index) { | 183 | for (u32 index = 0; index < desc.count; ++index) { |
| 184 | const VideoCommon::ImageViewId image_view_id{(views++)->id}; | ||
| 136 | const VkSampler sampler{*(samplers++)}; | 185 | const VkSampler sampler{*(samplers++)}; |
| 137 | ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; | 186 | ImageView& image_view{texture_cache.GetImageView(image_view_id)}; |
| 138 | const VkImageView vk_image_view{image_view.Handle(desc.type)}; | 187 | const VkImageView vk_image_view{image_view.Handle(desc.type)}; |
| 139 | update_descriptor_queue.AddSampledImage(vk_image_view, sampler); | 188 | update_descriptor_queue.AddSampledImage(vk_image_view, sampler); |
| 189 | rescaling.PushTexture(texture_cache.IsRescaling(image_view)); | ||
| 140 | } | 190 | } |
| 141 | } | 191 | } |
| 142 | for (const auto& desc : info.image_descriptors) { | 192 | for (const auto& desc : info.image_descriptors) { |
| 143 | for (u32 index = 0; index < desc.count; ++index) { | 193 | for (u32 index = 0; index < desc.count; ++index) { |
| 144 | ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; | 194 | ImageView& image_view{texture_cache.GetImageView((views++)->id)}; |
| 145 | if (desc.is_written) { | 195 | if (desc.is_written) { |
| 146 | texture_cache.MarkModification(image_view.image_id); | 196 | texture_cache.MarkModification(image_view.image_id); |
| 147 | } | 197 | } |
| 148 | const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; | 198 | const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; |
| 149 | update_descriptor_queue.AddImage(vk_image_view); | 199 | update_descriptor_queue.AddImage(vk_image_view); |
| 200 | rescaling.PushImage(texture_cache.IsRescaling(image_view)); | ||
| 150 | } | 201 | } |
| 151 | } | 202 | } |
| 152 | } | 203 | } |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 888bc7392..1e447e621 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -12,14 +12,22 @@ | |||
| 12 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/math_util.h" | 14 | #include "common/math_util.h" |
| 15 | #include "common/settings.h" | ||
| 15 | #include "core/core.h" | 16 | #include "core/core.h" |
| 16 | #include "core/frontend/emu_window.h" | 17 | #include "core/frontend/emu_window.h" |
| 17 | #include "core/memory.h" | 18 | #include "core/memory.h" |
| 18 | #include "video_core/gpu.h" | 19 | #include "video_core/gpu.h" |
| 20 | #include "video_core/host_shaders/fxaa_frag_spv.h" | ||
| 21 | #include "video_core/host_shaders/fxaa_vert_spv.h" | ||
| 22 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" | ||
| 23 | #include "video_core/host_shaders/present_gaussian_frag_spv.h" | ||
| 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" | 24 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" |
| 25 | #include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" | ||
| 26 | #include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" | ||
| 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" | 27 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" |
| 21 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 28 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 22 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 29 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 30 | #include "video_core/renderer_vulkan/vk_fsr.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 31 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 24 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 32 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 25 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 33 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| @@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 144 | scheduler.Wait(resource_ticks[image_index]); | 152 | scheduler.Wait(resource_ticks[image_index]); |
| 145 | resource_ticks[image_index] = scheduler.CurrentTick(); | 153 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 146 | 154 | ||
| 147 | UpdateDescriptorSet(image_index, | 155 | VkImageView source_image_view = |
| 148 | use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); | 156 | use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; |
| 149 | 157 | ||
| 150 | BufferData data; | 158 | BufferData data; |
| 151 | SetUniformData(data, layout); | 159 | SetUniformData(data, layout); |
| @@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 222 | read_barrier); | 230 | read_barrier); |
| 223 | cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); | 231 | cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); |
| 224 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | 232 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 225 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); | 233 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | |
| 234 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 235 | 0, write_barrier); | ||
| 226 | }); | 236 | }); |
| 227 | } | 237 | } |
| 238 | |||
| 239 | const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); | ||
| 240 | if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) { | ||
| 241 | UpdateAADescriptorSet(image_index, source_image_view, false); | ||
| 242 | const u32 up_scale = Settings::values.resolution_info.up_scale; | ||
| 243 | const u32 down_shift = Settings::values.resolution_info.down_shift; | ||
| 244 | VkExtent2D size{ | ||
| 245 | .width = (up_scale * framebuffer.width) >> down_shift, | ||
| 246 | .height = (up_scale * framebuffer.height) >> down_shift, | ||
| 247 | }; | ||
| 248 | scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { | ||
| 249 | const VkImageMemoryBarrier base_barrier{ | ||
| 250 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 251 | .pNext = nullptr, | ||
| 252 | .srcAccessMask = 0, | ||
| 253 | .dstAccessMask = 0, | ||
| 254 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 255 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 256 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 257 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 258 | .image = {}, | ||
| 259 | .subresourceRange = | ||
| 260 | { | ||
| 261 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 262 | .baseMipLevel = 0, | ||
| 263 | .levelCount = 1, | ||
| 264 | .baseArrayLayer = 0, | ||
| 265 | .layerCount = 1, | ||
| 266 | }, | ||
| 267 | }; | ||
| 268 | |||
| 269 | { | ||
| 270 | VkImageMemoryBarrier fsr_write_barrier = base_barrier; | ||
| 271 | fsr_write_barrier.image = *aa_image; | ||
| 272 | fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 273 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 274 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier); | ||
| 275 | } | ||
| 276 | |||
| 277 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; | ||
| 278 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; | ||
| 279 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; | ||
| 280 | const VkClearValue clear_color{ | ||
| 281 | .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, | ||
| 282 | }; | ||
| 283 | const VkRenderPassBeginInfo renderpass_bi{ | ||
| 284 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | ||
| 285 | .pNext = nullptr, | ||
| 286 | .renderPass = *aa_renderpass, | ||
| 287 | .framebuffer = *aa_framebuffer, | ||
| 288 | .renderArea = | ||
| 289 | { | ||
| 290 | .offset = {0, 0}, | ||
| 291 | .extent = size, | ||
| 292 | }, | ||
| 293 | .clearValueCount = 1, | ||
| 294 | .pClearValues = &clear_color, | ||
| 295 | }; | ||
| 296 | const VkViewport viewport{ | ||
| 297 | .x = 0.0f, | ||
| 298 | .y = 0.0f, | ||
| 299 | .width = static_cast<float>(size.width), | ||
| 300 | .height = static_cast<float>(size.height), | ||
| 301 | .minDepth = 0.0f, | ||
| 302 | .maxDepth = 1.0f, | ||
| 303 | }; | ||
| 304 | const VkRect2D scissor{ | ||
| 305 | .offset = {0, 0}, | ||
| 306 | .extent = size, | ||
| 307 | }; | ||
| 308 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | ||
| 309 | switch (anti_alias_pass) { | ||
| 310 | case Settings::AntiAliasing::Fxaa: | ||
| 311 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); | ||
| 312 | break; | ||
| 313 | default: | ||
| 314 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); | ||
| 315 | break; | ||
| 316 | } | ||
| 317 | cmdbuf.SetViewport(0, viewport); | ||
| 318 | cmdbuf.SetScissor(0, scissor); | ||
| 319 | |||
| 320 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); | ||
| 321 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, | ||
| 322 | aa_descriptor_sets[image_index], {}); | ||
| 323 | cmdbuf.Draw(4, 1, 0, 0); | ||
| 324 | cmdbuf.EndRenderPass(); | ||
| 325 | |||
| 326 | { | ||
| 327 | VkImageMemoryBarrier blit_read_barrier = base_barrier; | ||
| 328 | blit_read_barrier.image = *aa_image; | ||
| 329 | blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | ||
| 330 | blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | ||
| 331 | |||
| 332 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | ||
| 333 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); | ||
| 334 | } | ||
| 335 | }); | ||
| 336 | source_image_view = *aa_image_view; | ||
| 337 | } | ||
| 338 | |||
| 339 | if (fsr) { | ||
| 340 | auto crop_rect = framebuffer.crop_rect; | ||
| 341 | if (crop_rect.GetWidth() == 0) { | ||
| 342 | crop_rect.right = framebuffer.width; | ||
| 343 | } | ||
| 344 | if (crop_rect.GetHeight() == 0) { | ||
| 345 | crop_rect.bottom = framebuffer.height; | ||
| 346 | } | ||
| 347 | crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); | ||
| 348 | VkExtent2D fsr_input_size{ | ||
| 349 | .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), | ||
| 350 | .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), | ||
| 351 | }; | ||
| 352 | VkImageView fsr_image_view = | ||
| 353 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); | ||
| 354 | UpdateDescriptorSet(image_index, fsr_image_view, true); | ||
| 355 | } else { | ||
| 356 | const bool is_nn = | ||
| 357 | Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; | ||
| 358 | UpdateDescriptorSet(image_index, source_image_view, is_nn); | ||
| 359 | } | ||
| 360 | |||
| 228 | scheduler.Record( | 361 | scheduler.Record( |
| 229 | [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { | 362 | [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { |
| 230 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; | 363 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; |
| @@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 258 | .offset = {0, 0}, | 391 | .offset = {0, 0}, |
| 259 | .extent = size, | 392 | .extent = size, |
| 260 | }; | 393 | }; |
| 394 | const auto filter = Settings::values.scaling_filter.GetValue(); | ||
| 261 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 395 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 262 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); | 396 | switch (filter) { |
| 397 | case Settings::ScalingFilter::NearestNeighbor: | ||
| 398 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); | ||
| 399 | break; | ||
| 400 | case Settings::ScalingFilter::Bilinear: | ||
| 401 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); | ||
| 402 | break; | ||
| 403 | case Settings::ScalingFilter::Bicubic: | ||
| 404 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline); | ||
| 405 | break; | ||
| 406 | case Settings::ScalingFilter::Gaussian: | ||
| 407 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline); | ||
| 408 | break; | ||
| 409 | case Settings::ScalingFilter::ScaleForce: | ||
| 410 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline); | ||
| 411 | break; | ||
| 412 | default: | ||
| 413 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); | ||
| 414 | break; | ||
| 415 | } | ||
| 263 | cmdbuf.SetViewport(0, viewport); | 416 | cmdbuf.SetViewport(0, viewport); |
| 264 | cmdbuf.SetScissor(0, scissor); | 417 | cmdbuf.SetScissor(0, scissor); |
| 265 | 418 | ||
| @@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb | |||
| 281 | } | 434 | } |
| 282 | 435 | ||
| 283 | vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { | 436 | vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { |
| 437 | return CreateFramebuffer(image_view, extent, renderpass); | ||
| 438 | } | ||
| 439 | |||
| 440 | vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, | ||
| 441 | vk::RenderPass& rd) { | ||
| 284 | return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ | 442 | return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ |
| 285 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | 443 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, |
| 286 | .pNext = nullptr, | 444 | .pNext = nullptr, |
| 287 | .flags = 0, | 445 | .flags = 0, |
| 288 | .renderPass = *renderpass, | 446 | .renderPass = *rd, |
| 289 | .attachmentCount = 1, | 447 | .attachmentCount = 1, |
| 290 | .pAttachments = &image_view, | 448 | .pAttachments = &image_view, |
| 291 | .width = extent.width, | 449 | .width = extent.width, |
| @@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() { | |||
| 308 | CreateRenderPass(); | 466 | CreateRenderPass(); |
| 309 | CreateFramebuffers(); | 467 | CreateFramebuffers(); |
| 310 | CreateGraphicsPipeline(); | 468 | CreateGraphicsPipeline(); |
| 469 | fsr.reset(); | ||
| 470 | if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { | ||
| 471 | CreateFSR(); | ||
| 472 | } | ||
| 311 | } | 473 | } |
| 312 | 474 | ||
| 313 | void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { | 475 | void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { |
| 476 | if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { | ||
| 477 | if (!fsr) { | ||
| 478 | CreateFSR(); | ||
| 479 | } | ||
| 480 | } else { | ||
| 481 | fsr.reset(); | ||
| 482 | } | ||
| 483 | |||
| 314 | if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) { | 484 | if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) { |
| 315 | return; | 485 | return; |
| 316 | } | 486 | } |
| @@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) | |||
| 324 | 494 | ||
| 325 | void VKBlitScreen::CreateShaders() { | 495 | void VKBlitScreen::CreateShaders() { |
| 326 | vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); | 496 | vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); |
| 327 | fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); | 497 | fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV); |
| 498 | fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV); | ||
| 499 | bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); | ||
| 500 | bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); | ||
| 501 | gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); | ||
| 502 | if (device.IsFloat16Supported()) { | ||
| 503 | scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); | ||
| 504 | } else { | ||
| 505 | scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); | ||
| 506 | } | ||
| 328 | } | 507 | } |
| 329 | 508 | ||
| 330 | void VKBlitScreen::CreateSemaphores() { | 509 | void VKBlitScreen::CreateSemaphores() { |
| @@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() { | |||
| 344 | }, | 523 | }, |
| 345 | }}; | 524 | }}; |
| 346 | 525 | ||
| 526 | const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{ | ||
| 527 | { | ||
| 528 | .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 529 | .descriptorCount = static_cast<u32>(image_count * 2), | ||
| 530 | }, | ||
| 531 | }}; | ||
| 532 | |||
| 347 | const VkDescriptorPoolCreateInfo ci{ | 533 | const VkDescriptorPoolCreateInfo ci{ |
| 348 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | 534 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, |
| 349 | .pNext = nullptr, | 535 | .pNext = nullptr, |
| @@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() { | |||
| 353 | .pPoolSizes = pool_sizes.data(), | 539 | .pPoolSizes = pool_sizes.data(), |
| 354 | }; | 540 | }; |
| 355 | descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); | 541 | descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); |
| 542 | |||
| 543 | const VkDescriptorPoolCreateInfo ci_aa{ | ||
| 544 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | ||
| 545 | .pNext = nullptr, | ||
| 546 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, | ||
| 547 | .maxSets = static_cast<u32>(image_count), | ||
| 548 | .poolSizeCount = static_cast<u32>(pool_sizes_aa.size()), | ||
| 549 | .pPoolSizes = pool_sizes_aa.data(), | ||
| 550 | }; | ||
| 551 | aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa); | ||
| 356 | } | 552 | } |
| 357 | 553 | ||
| 358 | void VKBlitScreen::CreateRenderPass() { | 554 | void VKBlitScreen::CreateRenderPass() { |
| 555 | renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); | ||
| 556 | } | ||
| 557 | |||
| 558 | vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { | ||
| 359 | const VkAttachmentDescription color_attachment{ | 559 | const VkAttachmentDescription color_attachment{ |
| 360 | .flags = 0, | 560 | .flags = 0, |
| 361 | .format = swapchain.GetImageViewFormat(), | 561 | .format = format, |
| 362 | .samples = VK_SAMPLE_COUNT_1_BIT, | 562 | .samples = VK_SAMPLE_COUNT_1_BIT, |
| 363 | .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, | 563 | .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, |
| 364 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | 564 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, |
| 365 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, | 565 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, |
| 366 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, | 566 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, |
| 367 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 567 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 368 | .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, | 568 | .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, |
| 369 | }; | 569 | }; |
| 370 | 570 | ||
| 371 | const VkAttachmentReference color_attachment_ref{ | 571 | const VkAttachmentReference color_attachment_ref{ |
| @@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() { | |||
| 408 | .pDependencies = &dependency, | 608 | .pDependencies = &dependency, |
| 409 | }; | 609 | }; |
| 410 | 610 | ||
| 411 | renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); | 611 | return device.GetLogical().CreateRenderPass(renderpass_ci); |
| 412 | } | 612 | } |
| 413 | 613 | ||
| 414 | void VKBlitScreen::CreateDescriptorSetLayout() { | 614 | void VKBlitScreen::CreateDescriptorSetLayout() { |
| @@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() { | |||
| 429 | }, | 629 | }, |
| 430 | }}; | 630 | }}; |
| 431 | 631 | ||
| 632 | const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{ | ||
| 633 | { | ||
| 634 | .binding = 0, | ||
| 635 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 636 | .descriptorCount = 1, | ||
| 637 | .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 638 | .pImmutableSamplers = nullptr, | ||
| 639 | }, | ||
| 640 | { | ||
| 641 | .binding = 1, | ||
| 642 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 643 | .descriptorCount = 1, | ||
| 644 | .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 645 | .pImmutableSamplers = nullptr, | ||
| 646 | }, | ||
| 647 | }}; | ||
| 648 | |||
| 432 | const VkDescriptorSetLayoutCreateInfo ci{ | 649 | const VkDescriptorSetLayoutCreateInfo ci{ |
| 433 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 650 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 434 | .pNext = nullptr, | 651 | .pNext = nullptr, |
| @@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() { | |||
| 437 | .pBindings = layout_bindings.data(), | 654 | .pBindings = layout_bindings.data(), |
| 438 | }; | 655 | }; |
| 439 | 656 | ||
| 657 | const VkDescriptorSetLayoutCreateInfo ci_aa{ | ||
| 658 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 659 | .pNext = nullptr, | ||
| 660 | .flags = 0, | ||
| 661 | .bindingCount = static_cast<u32>(layout_bindings_aa.size()), | ||
| 662 | .pBindings = layout_bindings_aa.data(), | ||
| 663 | }; | ||
| 664 | |||
| 440 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); | 665 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); |
| 666 | aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa); | ||
| 441 | } | 667 | } |
| 442 | 668 | ||
| 443 | void VKBlitScreen::CreateDescriptorSets() { | 669 | void VKBlitScreen::CreateDescriptorSets() { |
| 444 | const std::vector layouts(image_count, *descriptor_set_layout); | 670 | const std::vector layouts(image_count, *descriptor_set_layout); |
| 671 | const std::vector layouts_aa(image_count, *aa_descriptor_set_layout); | ||
| 445 | 672 | ||
| 446 | const VkDescriptorSetAllocateInfo ai{ | 673 | const VkDescriptorSetAllocateInfo ai{ |
| 447 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | 674 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, |
| @@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() { | |||
| 451 | .pSetLayouts = layouts.data(), | 678 | .pSetLayouts = layouts.data(), |
| 452 | }; | 679 | }; |
| 453 | 680 | ||
| 681 | const VkDescriptorSetAllocateInfo ai_aa{ | ||
| 682 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | ||
| 683 | .pNext = nullptr, | ||
| 684 | .descriptorPool = *aa_descriptor_pool, | ||
| 685 | .descriptorSetCount = static_cast<u32>(image_count), | ||
| 686 | .pSetLayouts = layouts_aa.data(), | ||
| 687 | }; | ||
| 688 | |||
| 454 | descriptor_sets = descriptor_pool.Allocate(ai); | 689 | descriptor_sets = descriptor_pool.Allocate(ai); |
| 690 | aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa); | ||
| 455 | } | 691 | } |
| 456 | 692 | ||
| 457 | void VKBlitScreen::CreatePipelineLayout() { | 693 | void VKBlitScreen::CreatePipelineLayout() { |
| @@ -464,11 +700,63 @@ void VKBlitScreen::CreatePipelineLayout() { | |||
| 464 | .pushConstantRangeCount = 0, | 700 | .pushConstantRangeCount = 0, |
| 465 | .pPushConstantRanges = nullptr, | 701 | .pPushConstantRanges = nullptr, |
| 466 | }; | 702 | }; |
| 703 | const VkPipelineLayoutCreateInfo ci_aa{ | ||
| 704 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 705 | .pNext = nullptr, | ||
| 706 | .flags = 0, | ||
| 707 | .setLayoutCount = 1, | ||
| 708 | .pSetLayouts = aa_descriptor_set_layout.address(), | ||
| 709 | .pushConstantRangeCount = 0, | ||
| 710 | .pPushConstantRanges = nullptr, | ||
| 711 | }; | ||
| 467 | pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); | 712 | pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); |
| 713 | aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa); | ||
| 468 | } | 714 | } |
| 469 | 715 | ||
| 470 | void VKBlitScreen::CreateGraphicsPipeline() { | 716 | void VKBlitScreen::CreateGraphicsPipeline() { |
| 471 | const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ | 717 | const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{ |
| 718 | { | ||
| 719 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 720 | .pNext = nullptr, | ||
| 721 | .flags = 0, | ||
| 722 | .stage = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 723 | .module = *vertex_shader, | ||
| 724 | .pName = "main", | ||
| 725 | .pSpecializationInfo = nullptr, | ||
| 726 | }, | ||
| 727 | { | ||
| 728 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 729 | .pNext = nullptr, | ||
| 730 | .flags = 0, | ||
| 731 | .stage = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 732 | .module = *bilinear_fragment_shader, | ||
| 733 | .pName = "main", | ||
| 734 | .pSpecializationInfo = nullptr, | ||
| 735 | }, | ||
| 736 | }}; | ||
| 737 | |||
| 738 | const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{ | ||
| 739 | { | ||
| 740 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 741 | .pNext = nullptr, | ||
| 742 | .flags = 0, | ||
| 743 | .stage = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 744 | .module = *vertex_shader, | ||
| 745 | .pName = "main", | ||
| 746 | .pSpecializationInfo = nullptr, | ||
| 747 | }, | ||
| 748 | { | ||
| 749 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 750 | .pNext = nullptr, | ||
| 751 | .flags = 0, | ||
| 752 | .stage = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 753 | .module = *bicubic_fragment_shader, | ||
| 754 | .pName = "main", | ||
| 755 | .pSpecializationInfo = nullptr, | ||
| 756 | }, | ||
| 757 | }}; | ||
| 758 | |||
| 759 | const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{ | ||
| 472 | { | 760 | { |
| 473 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 761 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 474 | .pNext = nullptr, | 762 | .pNext = nullptr, |
| @@ -483,7 +771,28 @@ void VKBlitScreen::CreateGraphicsPipeline() { | |||
| 483 | .pNext = nullptr, | 771 | .pNext = nullptr, |
| 484 | .flags = 0, | 772 | .flags = 0, |
| 485 | .stage = VK_SHADER_STAGE_FRAGMENT_BIT, | 773 | .stage = VK_SHADER_STAGE_FRAGMENT_BIT, |
| 486 | .module = *fragment_shader, | 774 | .module = *gaussian_fragment_shader, |
| 775 | .pName = "main", | ||
| 776 | .pSpecializationInfo = nullptr, | ||
| 777 | }, | ||
| 778 | }}; | ||
| 779 | |||
| 780 | const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{ | ||
| 781 | { | ||
| 782 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 783 | .pNext = nullptr, | ||
| 784 | .flags = 0, | ||
| 785 | .stage = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 786 | .module = *vertex_shader, | ||
| 787 | .pName = "main", | ||
| 788 | .pSpecializationInfo = nullptr, | ||
| 789 | }, | ||
| 790 | { | ||
| 791 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 792 | .pNext = nullptr, | ||
| 793 | .flags = 0, | ||
| 794 | .stage = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 795 | .module = *scaleforce_fragment_shader, | ||
| 487 | .pName = "main", | 796 | .pName = "main", |
| 488 | .pSpecializationInfo = nullptr, | 797 | .pSpecializationInfo = nullptr, |
| 489 | }, | 798 | }, |
| @@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() { | |||
| 583 | .pDynamicStates = dynamic_states.data(), | 892 | .pDynamicStates = dynamic_states.data(), |
| 584 | }; | 893 | }; |
| 585 | 894 | ||
| 586 | const VkGraphicsPipelineCreateInfo pipeline_ci{ | 895 | const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{ |
| 587 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 896 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 588 | .pNext = nullptr, | 897 | .pNext = nullptr, |
| 589 | .flags = 0, | 898 | .flags = 0, |
| 590 | .stageCount = static_cast<u32>(shader_stages.size()), | 899 | .stageCount = static_cast<u32>(bilinear_shader_stages.size()), |
| 591 | .pStages = shader_stages.data(), | 900 | .pStages = bilinear_shader_stages.data(), |
| 592 | .pVertexInputState = &vertex_input_ci, | 901 | .pVertexInputState = &vertex_input_ci, |
| 593 | .pInputAssemblyState = &input_assembly_ci, | 902 | .pInputAssemblyState = &input_assembly_ci, |
| 594 | .pTessellationState = nullptr, | 903 | .pTessellationState = nullptr, |
| @@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() { | |||
| 605 | .basePipelineIndex = 0, | 914 | .basePipelineIndex = 0, |
| 606 | }; | 915 | }; |
| 607 | 916 | ||
| 608 | pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); | 917 | const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{ |
| 918 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 919 | .pNext = nullptr, | ||
| 920 | .flags = 0, | ||
| 921 | .stageCount = static_cast<u32>(bicubic_shader_stages.size()), | ||
| 922 | .pStages = bicubic_shader_stages.data(), | ||
| 923 | .pVertexInputState = &vertex_input_ci, | ||
| 924 | .pInputAssemblyState = &input_assembly_ci, | ||
| 925 | .pTessellationState = nullptr, | ||
| 926 | .pViewportState = &viewport_state_ci, | ||
| 927 | .pRasterizationState = &rasterization_ci, | ||
| 928 | .pMultisampleState = &multisampling_ci, | ||
| 929 | .pDepthStencilState = nullptr, | ||
| 930 | .pColorBlendState = &color_blend_ci, | ||
| 931 | .pDynamicState = &dynamic_state_ci, | ||
| 932 | .layout = *pipeline_layout, | ||
| 933 | .renderPass = *renderpass, | ||
| 934 | .subpass = 0, | ||
| 935 | .basePipelineHandle = 0, | ||
| 936 | .basePipelineIndex = 0, | ||
| 937 | }; | ||
| 938 | |||
| 939 | const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{ | ||
| 940 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 941 | .pNext = nullptr, | ||
| 942 | .flags = 0, | ||
| 943 | .stageCount = static_cast<u32>(gaussian_shader_stages.size()), | ||
| 944 | .pStages = gaussian_shader_stages.data(), | ||
| 945 | .pVertexInputState = &vertex_input_ci, | ||
| 946 | .pInputAssemblyState = &input_assembly_ci, | ||
| 947 | .pTessellationState = nullptr, | ||
| 948 | .pViewportState = &viewport_state_ci, | ||
| 949 | .pRasterizationState = &rasterization_ci, | ||
| 950 | .pMultisampleState = &multisampling_ci, | ||
| 951 | .pDepthStencilState = nullptr, | ||
| 952 | .pColorBlendState = &color_blend_ci, | ||
| 953 | .pDynamicState = &dynamic_state_ci, | ||
| 954 | .layout = *pipeline_layout, | ||
| 955 | .renderPass = *renderpass, | ||
| 956 | .subpass = 0, | ||
| 957 | .basePipelineHandle = 0, | ||
| 958 | .basePipelineIndex = 0, | ||
| 959 | }; | ||
| 960 | |||
| 961 | const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{ | ||
| 962 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 963 | .pNext = nullptr, | ||
| 964 | .flags = 0, | ||
| 965 | .stageCount = static_cast<u32>(scaleforce_shader_stages.size()), | ||
| 966 | .pStages = scaleforce_shader_stages.data(), | ||
| 967 | .pVertexInputState = &vertex_input_ci, | ||
| 968 | .pInputAssemblyState = &input_assembly_ci, | ||
| 969 | .pTessellationState = nullptr, | ||
| 970 | .pViewportState = &viewport_state_ci, | ||
| 971 | .pRasterizationState = &rasterization_ci, | ||
| 972 | .pMultisampleState = &multisampling_ci, | ||
| 973 | .pDepthStencilState = nullptr, | ||
| 974 | .pColorBlendState = &color_blend_ci, | ||
| 975 | .pDynamicState = &dynamic_state_ci, | ||
| 976 | .layout = *pipeline_layout, | ||
| 977 | .renderPass = *renderpass, | ||
| 978 | .subpass = 0, | ||
| 979 | .basePipelineHandle = 0, | ||
| 980 | .basePipelineIndex = 0, | ||
| 981 | }; | ||
| 982 | |||
| 983 | bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci); | ||
| 984 | bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci); | ||
| 985 | gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci); | ||
| 986 | scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci); | ||
| 609 | } | 987 | } |
| 610 | 988 | ||
| 611 | void VKBlitScreen::CreateSampler() { | 989 | void VKBlitScreen::CreateSampler() { |
| @@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() { | |||
| 614 | .pNext = nullptr, | 992 | .pNext = nullptr, |
| 615 | .flags = 0, | 993 | .flags = 0, |
| 616 | .magFilter = VK_FILTER_LINEAR, | 994 | .magFilter = VK_FILTER_LINEAR, |
| 995 | .minFilter = VK_FILTER_LINEAR, | ||
| 996 | .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, | ||
| 997 | .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 998 | .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 999 | .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | ||
| 1000 | .mipLodBias = 0.0f, | ||
| 1001 | .anisotropyEnable = VK_FALSE, | ||
| 1002 | .maxAnisotropy = 0.0f, | ||
| 1003 | .compareEnable = VK_FALSE, | ||
| 1004 | .compareOp = VK_COMPARE_OP_NEVER, | ||
| 1005 | .minLod = 0.0f, | ||
| 1006 | .maxLod = 0.0f, | ||
| 1007 | .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, | ||
| 1008 | .unnormalizedCoordinates = VK_FALSE, | ||
| 1009 | }; | ||
| 1010 | |||
| 1011 | const VkSamplerCreateInfo ci_nn{ | ||
| 1012 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | ||
| 1013 | .pNext = nullptr, | ||
| 1014 | .flags = 0, | ||
| 1015 | .magFilter = VK_FILTER_NEAREST, | ||
| 617 | .minFilter = VK_FILTER_NEAREST, | 1016 | .minFilter = VK_FILTER_NEAREST, |
| 618 | .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, | 1017 | .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, |
| 619 | .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | 1018 | .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, |
| 620 | .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | 1019 | .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, |
| 621 | .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, | 1020 | .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, |
| @@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() { | |||
| 631 | }; | 1030 | }; |
| 632 | 1031 | ||
| 633 | sampler = device.GetLogical().CreateSampler(ci); | 1032 | sampler = device.GetLogical().CreateSampler(ci); |
| 1033 | nn_sampler = device.GetLogical().CreateSampler(ci_nn); | ||
| 634 | } | 1034 | } |
| 635 | 1035 | ||
| 636 | void VKBlitScreen::CreateFramebuffers() { | 1036 | void VKBlitScreen::CreateFramebuffers() { |
| @@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() { | |||
| 639 | 1039 | ||
| 640 | for (std::size_t i = 0; i < image_count; ++i) { | 1040 | for (std::size_t i = 0; i < image_count; ++i) { |
| 641 | const VkImageView image_view{swapchain.GetImageViewIndex(i)}; | 1041 | const VkImageView image_view{swapchain.GetImageViewIndex(i)}; |
| 642 | framebuffers[i] = CreateFramebuffer(image_view, size); | 1042 | framebuffers[i] = CreateFramebuffer(image_view, size, renderpass); |
| 643 | } | 1043 | } |
| 644 | } | 1044 | } |
| 645 | 1045 | ||
| @@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() { | |||
| 649 | } | 1049 | } |
| 650 | raw_images.clear(); | 1050 | raw_images.clear(); |
| 651 | raw_buffer_commits.clear(); | 1051 | raw_buffer_commits.clear(); |
| 1052 | |||
| 1053 | aa_image_view.reset(); | ||
| 1054 | aa_image.reset(); | ||
| 1055 | aa_commit = MemoryCommit{}; | ||
| 1056 | |||
| 652 | buffer.reset(); | 1057 | buffer.reset(); |
| 653 | buffer_commit = MemoryCommit{}; | 1058 | buffer_commit = MemoryCommit{}; |
| 654 | } | 1059 | } |
| @@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) | |||
| 675 | raw_image_views.resize(image_count); | 1080 | raw_image_views.resize(image_count); |
| 676 | raw_buffer_commits.resize(image_count); | 1081 | raw_buffer_commits.resize(image_count); |
| 677 | 1082 | ||
| 678 | for (size_t i = 0; i < image_count; ++i) { | 1083 | const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, |
| 679 | raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ | 1084 | u32 down_shift = 0) { |
| 1085 | u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | ||
| 1086 | : VK_IMAGE_USAGE_TRANSFER_DST_BIT; | ||
| 1087 | return device.GetLogical().CreateImage(VkImageCreateInfo{ | ||
| 680 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 1088 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 681 | .pNext = nullptr, | 1089 | .pNext = nullptr, |
| 682 | .flags = 0, | 1090 | .flags = 0, |
| @@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) | |||
| 684 | .format = GetFormat(framebuffer), | 1092 | .format = GetFormat(framebuffer), |
| 685 | .extent = | 1093 | .extent = |
| 686 | { | 1094 | { |
| 687 | .width = framebuffer.width, | 1095 | .width = (up_scale * framebuffer.width) >> down_shift, |
| 688 | .height = framebuffer.height, | 1096 | .height = (up_scale * framebuffer.height) >> down_shift, |
| 689 | .depth = 1, | 1097 | .depth = 1, |
| 690 | }, | 1098 | }, |
| 691 | .mipLevels = 1, | 1099 | .mipLevels = 1, |
| 692 | .arrayLayers = 1, | 1100 | .arrayLayers = 1, |
| 693 | .samples = VK_SAMPLE_COUNT_1_BIT, | 1101 | .samples = VK_SAMPLE_COUNT_1_BIT, |
| 694 | .tiling = VK_IMAGE_TILING_LINEAR, | 1102 | .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, |
| 695 | .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, | 1103 | .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages, |
| 696 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 1104 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 697 | .queueFamilyIndexCount = 0, | 1105 | .queueFamilyIndexCount = 0, |
| 698 | .pQueueFamilyIndices = nullptr, | 1106 | .pQueueFamilyIndices = nullptr, |
| 699 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 1107 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 700 | }); | 1108 | }); |
| 701 | raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal); | 1109 | }; |
| 702 | raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | 1110 | const auto create_commit = [&](vk::Image& image) { |
| 1111 | return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); | ||
| 1112 | }; | ||
| 1113 | const auto create_image_view = [&](vk::Image& image) { | ||
| 1114 | return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | ||
| 703 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 1115 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 704 | .pNext = nullptr, | 1116 | .pNext = nullptr, |
| 705 | .flags = 0, | 1117 | .flags = 0, |
| 706 | .image = *raw_images[i], | 1118 | .image = *image, |
| 707 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | 1119 | .viewType = VK_IMAGE_VIEW_TYPE_2D, |
| 708 | .format = GetFormat(framebuffer), | 1120 | .format = GetFormat(framebuffer), |
| 709 | .components = | 1121 | .components = |
| @@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) | |||
| 722 | .layerCount = 1, | 1134 | .layerCount = 1, |
| 723 | }, | 1135 | }, |
| 724 | }); | 1136 | }); |
| 1137 | }; | ||
| 1138 | |||
| 1139 | for (size_t i = 0; i < image_count; ++i) { | ||
| 1140 | raw_images[i] = create_image(); | ||
| 1141 | raw_buffer_commits[i] = create_commit(raw_images[i]); | ||
| 1142 | raw_image_views[i] = create_image_view(raw_images[i]); | ||
| 725 | } | 1143 | } |
| 1144 | |||
| 1145 | // AA Resources | ||
| 1146 | const u32 up_scale = Settings::values.resolution_info.up_scale; | ||
| 1147 | const u32 down_shift = Settings::values.resolution_info.down_shift; | ||
| 1148 | aa_image = create_image(true, up_scale, down_shift); | ||
| 1149 | aa_commit = create_commit(aa_image); | ||
| 1150 | aa_image_view = create_image_view(aa_image); | ||
| 1151 | VkExtent2D size{ | ||
| 1152 | .width = (up_scale * framebuffer.width) >> down_shift, | ||
| 1153 | .height = (up_scale * framebuffer.height) >> down_shift, | ||
| 1154 | }; | ||
| 1155 | if (aa_renderpass) { | ||
| 1156 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); | ||
| 1157 | return; | ||
| 1158 | } | ||
| 1159 | aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); | ||
| 1160 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); | ||
| 1161 | |||
| 1162 | const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ | ||
| 1163 | { | ||
| 1164 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 1165 | .pNext = nullptr, | ||
| 1166 | .flags = 0, | ||
| 1167 | .stage = VK_SHADER_STAGE_VERTEX_BIT, | ||
| 1168 | .module = *fxaa_vertex_shader, | ||
| 1169 | .pName = "main", | ||
| 1170 | .pSpecializationInfo = nullptr, | ||
| 1171 | }, | ||
| 1172 | { | ||
| 1173 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 1174 | .pNext = nullptr, | ||
| 1175 | .flags = 0, | ||
| 1176 | .stage = VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 1177 | .module = *fxaa_fragment_shader, | ||
| 1178 | .pName = "main", | ||
| 1179 | .pSpecializationInfo = nullptr, | ||
| 1180 | }, | ||
| 1181 | }}; | ||
| 1182 | |||
| 1183 | const auto vertex_binding_description = ScreenRectVertex::GetDescription(); | ||
| 1184 | const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); | ||
| 1185 | |||
| 1186 | const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ | ||
| 1187 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 1188 | .pNext = nullptr, | ||
| 1189 | .flags = 0, | ||
| 1190 | .vertexBindingDescriptionCount = 1, | ||
| 1191 | .pVertexBindingDescriptions = &vertex_binding_description, | ||
| 1192 | .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, | ||
| 1193 | .pVertexAttributeDescriptions = vertex_attrs_description.data(), | ||
| 1194 | }; | ||
| 1195 | |||
| 1196 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ | ||
| 1197 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 1198 | .pNext = nullptr, | ||
| 1199 | .flags = 0, | ||
| 1200 | .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, | ||
| 1201 | .primitiveRestartEnable = VK_FALSE, | ||
| 1202 | }; | ||
| 1203 | |||
| 1204 | const VkPipelineViewportStateCreateInfo viewport_state_ci{ | ||
| 1205 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 1206 | .pNext = nullptr, | ||
| 1207 | .flags = 0, | ||
| 1208 | .viewportCount = 1, | ||
| 1209 | .pViewports = nullptr, | ||
| 1210 | .scissorCount = 1, | ||
| 1211 | .pScissors = nullptr, | ||
| 1212 | }; | ||
| 1213 | |||
| 1214 | const VkPipelineRasterizationStateCreateInfo rasterization_ci{ | ||
| 1215 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 1216 | .pNext = nullptr, | ||
| 1217 | .flags = 0, | ||
| 1218 | .depthClampEnable = VK_FALSE, | ||
| 1219 | .rasterizerDiscardEnable = VK_FALSE, | ||
| 1220 | .polygonMode = VK_POLYGON_MODE_FILL, | ||
| 1221 | .cullMode = VK_CULL_MODE_NONE, | ||
| 1222 | .frontFace = VK_FRONT_FACE_CLOCKWISE, | ||
| 1223 | .depthBiasEnable = VK_FALSE, | ||
| 1224 | .depthBiasConstantFactor = 0.0f, | ||
| 1225 | .depthBiasClamp = 0.0f, | ||
| 1226 | .depthBiasSlopeFactor = 0.0f, | ||
| 1227 | .lineWidth = 1.0f, | ||
| 1228 | }; | ||
| 1229 | |||
| 1230 | const VkPipelineMultisampleStateCreateInfo multisampling_ci{ | ||
| 1231 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 1232 | .pNext = nullptr, | ||
| 1233 | .flags = 0, | ||
| 1234 | .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, | ||
| 1235 | .sampleShadingEnable = VK_FALSE, | ||
| 1236 | .minSampleShading = 0.0f, | ||
| 1237 | .pSampleMask = nullptr, | ||
| 1238 | .alphaToCoverageEnable = VK_FALSE, | ||
| 1239 | .alphaToOneEnable = VK_FALSE, | ||
| 1240 | }; | ||
| 1241 | |||
| 1242 | const VkPipelineColorBlendAttachmentState color_blend_attachment{ | ||
| 1243 | .blendEnable = VK_FALSE, | ||
| 1244 | .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 1245 | .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 1246 | .colorBlendOp = VK_BLEND_OP_ADD, | ||
| 1247 | .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 1248 | .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, | ||
| 1249 | .alphaBlendOp = VK_BLEND_OP_ADD, | ||
| 1250 | .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | | ||
| 1251 | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, | ||
| 1252 | }; | ||
| 1253 | |||
| 1254 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ | ||
| 1255 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 1256 | .pNext = nullptr, | ||
| 1257 | .flags = 0, | ||
| 1258 | .logicOpEnable = VK_FALSE, | ||
| 1259 | .logicOp = VK_LOGIC_OP_COPY, | ||
| 1260 | .attachmentCount = 1, | ||
| 1261 | .pAttachments = &color_blend_attachment, | ||
| 1262 | .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, | ||
| 1263 | }; | ||
| 1264 | |||
| 1265 | static constexpr std::array dynamic_states{ | ||
| 1266 | VK_DYNAMIC_STATE_VIEWPORT, | ||
| 1267 | VK_DYNAMIC_STATE_SCISSOR, | ||
| 1268 | }; | ||
| 1269 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ | ||
| 1270 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 1271 | .pNext = nullptr, | ||
| 1272 | .flags = 0, | ||
| 1273 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), | ||
| 1274 | .pDynamicStates = dynamic_states.data(), | ||
| 1275 | }; | ||
| 1276 | |||
| 1277 | const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{ | ||
| 1278 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 1279 | .pNext = nullptr, | ||
| 1280 | .flags = 0, | ||
| 1281 | .stageCount = static_cast<u32>(fxaa_shader_stages.size()), | ||
| 1282 | .pStages = fxaa_shader_stages.data(), | ||
| 1283 | .pVertexInputState = &vertex_input_ci, | ||
| 1284 | .pInputAssemblyState = &input_assembly_ci, | ||
| 1285 | .pTessellationState = nullptr, | ||
| 1286 | .pViewportState = &viewport_state_ci, | ||
| 1287 | .pRasterizationState = &rasterization_ci, | ||
| 1288 | .pMultisampleState = &multisampling_ci, | ||
| 1289 | .pDepthStencilState = nullptr, | ||
| 1290 | .pColorBlendState = &color_blend_ci, | ||
| 1291 | .pDynamicState = &dynamic_state_ci, | ||
| 1292 | .layout = *aa_pipeline_layout, | ||
| 1293 | .renderPass = *aa_renderpass, | ||
| 1294 | .subpass = 0, | ||
| 1295 | .basePipelineHandle = 0, | ||
| 1296 | .basePipelineIndex = 0, | ||
| 1297 | }; | ||
| 1298 | |||
| 1299 | // AA | ||
| 1300 | aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); | ||
| 726 | } | 1301 | } |
| 727 | 1302 | ||
| 728 | void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { | 1303 | void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, |
| 1304 | bool nn) const { | ||
| 1305 | const VkDescriptorImageInfo image_info{ | ||
| 1306 | .sampler = nn ? *nn_sampler : *sampler, | ||
| 1307 | .imageView = image_view, | ||
| 1308 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 1309 | }; | ||
| 1310 | |||
| 1311 | const VkWriteDescriptorSet sampler_write{ | ||
| 1312 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 1313 | .pNext = nullptr, | ||
| 1314 | .dstSet = aa_descriptor_sets[image_index], | ||
| 1315 | .dstBinding = 0, | ||
| 1316 | .dstArrayElement = 0, | ||
| 1317 | .descriptorCount = 1, | ||
| 1318 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 1319 | .pImageInfo = &image_info, | ||
| 1320 | .pBufferInfo = nullptr, | ||
| 1321 | .pTexelBufferView = nullptr, | ||
| 1322 | }; | ||
| 1323 | |||
| 1324 | const VkWriteDescriptorSet sampler_write_2{ | ||
| 1325 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 1326 | .pNext = nullptr, | ||
| 1327 | .dstSet = aa_descriptor_sets[image_index], | ||
| 1328 | .dstBinding = 1, | ||
| 1329 | .dstArrayElement = 0, | ||
| 1330 | .descriptorCount = 1, | ||
| 1331 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 1332 | .pImageInfo = &image_info, | ||
| 1333 | .pBufferInfo = nullptr, | ||
| 1334 | .pTexelBufferView = nullptr, | ||
| 1335 | }; | ||
| 1336 | |||
| 1337 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, | ||
| 1341 | bool nn) const { | ||
| 729 | const VkDescriptorBufferInfo buffer_info{ | 1342 | const VkDescriptorBufferInfo buffer_info{ |
| 730 | .buffer = *buffer, | 1343 | .buffer = *buffer, |
| 731 | .offset = offsetof(BufferData, uniform), | 1344 | .offset = offsetof(BufferData, uniform), |
| @@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag | |||
| 746 | }; | 1359 | }; |
| 747 | 1360 | ||
| 748 | const VkDescriptorImageInfo image_info{ | 1361 | const VkDescriptorImageInfo image_info{ |
| 749 | .sampler = *sampler, | 1362 | .sampler = nn ? *nn_sampler : *sampler, |
| 750 | .imageView = image_view, | 1363 | .imageView = image_view, |
| 751 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | 1364 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 752 | }; | 1365 | }; |
| @@ -798,17 +1411,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi | |||
| 798 | UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0); | 1411 | UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0); |
| 799 | UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); | 1412 | UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); |
| 800 | 1413 | ||
| 801 | // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering | ||
| 802 | // (e.g. handheld mode) on a 1920x1080 framebuffer. | ||
| 803 | f32 scale_u = 1.0f; | 1414 | f32 scale_u = 1.0f; |
| 804 | f32 scale_v = 1.0f; | 1415 | f32 scale_v = 1.0f; |
| 805 | if (framebuffer_crop_rect.GetWidth() > 0) { | 1416 | // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering |
| 806 | scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / | 1417 | // (e.g. handheld mode) on a 1920x1080 framebuffer. |
| 807 | static_cast<f32>(screen_info.width); | 1418 | if (!fsr) { |
| 808 | } | 1419 | if (framebuffer_crop_rect.GetWidth() > 0) { |
| 809 | if (framebuffer_crop_rect.GetHeight() > 0) { | 1420 | scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / |
| 810 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / | 1421 | static_cast<f32>(screen_info.width); |
| 811 | static_cast<f32>(screen_info.height); | 1422 | } |
| 1423 | if (framebuffer_crop_rect.GetHeight() > 0) { | ||
| 1424 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / | ||
| 1425 | static_cast<f32>(screen_info.height); | ||
| 1426 | } | ||
| 812 | } | 1427 | } |
| 813 | 1428 | ||
| 814 | const auto& screen = layout.screen; | 1429 | const auto& screen = layout.screen; |
| @@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi | |||
| 822 | data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v); | 1437 | data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v); |
| 823 | } | 1438 | } |
| 824 | 1439 | ||
| 1440 | void VKBlitScreen::CreateFSR() { | ||
| 1441 | const auto& layout = render_window.GetFramebufferLayout(); | ||
| 1442 | const VkExtent2D fsr_size{ | ||
| 1443 | .width = layout.screen.GetWidth(), | ||
| 1444 | .height = layout.screen.GetHeight(), | ||
| 1445 | }; | ||
| 1446 | fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size); | ||
| 1447 | } | ||
| 1448 | |||
| 825 | u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { | 1449 | u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { |
| 826 | return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; | 1450 | return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; |
| 827 | } | 1451 | } |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 430bcfbca..bbca71af3 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -34,6 +34,7 @@ namespace Vulkan { | |||
| 34 | struct ScreenInfo; | 34 | struct ScreenInfo; |
| 35 | 35 | ||
| 36 | class Device; | 36 | class Device; |
| 37 | class FSR; | ||
| 37 | class RasterizerVulkan; | 38 | class RasterizerVulkan; |
| 38 | class VKScheduler; | 39 | class VKScheduler; |
| 39 | class VKSwapchain; | 40 | class VKSwapchain; |
| @@ -66,6 +67,9 @@ public: | |||
| 66 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, | 67 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, |
| 67 | VkExtent2D extent); | 68 | VkExtent2D extent); |
| 68 | 69 | ||
| 70 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, | ||
| 71 | VkExtent2D extent, vk::RenderPass& rd); | ||
| 72 | |||
| 69 | private: | 73 | private: |
| 70 | struct BufferData; | 74 | struct BufferData; |
| 71 | 75 | ||
| @@ -74,6 +78,7 @@ private: | |||
| 74 | void CreateSemaphores(); | 78 | void CreateSemaphores(); |
| 75 | void CreateDescriptorPool(); | 79 | void CreateDescriptorPool(); |
| 76 | void CreateRenderPass(); | 80 | void CreateRenderPass(); |
| 81 | vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); | ||
| 77 | void CreateDescriptorSetLayout(); | 82 | void CreateDescriptorSetLayout(); |
| 78 | void CreateDescriptorSets(); | 83 | void CreateDescriptorSets(); |
| 79 | void CreatePipelineLayout(); | 84 | void CreatePipelineLayout(); |
| @@ -88,11 +93,14 @@ private: | |||
| 88 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); | 93 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); |
| 89 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); | 94 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); |
| 90 | 95 | ||
| 91 | void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; | 96 | void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; |
| 97 | void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; | ||
| 92 | void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; | 98 | void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; |
| 93 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, | 99 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, |
| 94 | const Layout::FramebufferLayout layout) const; | 100 | const Layout::FramebufferLayout layout) const; |
| 95 | 101 | ||
| 102 | void CreateFSR(); | ||
| 103 | |||
| 96 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | 104 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; |
| 97 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, | 105 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, |
| 98 | std::size_t image_index) const; | 106 | std::size_t image_index) const; |
| @@ -107,14 +115,24 @@ private: | |||
| 107 | const VKScreenInfo& screen_info; | 115 | const VKScreenInfo& screen_info; |
| 108 | 116 | ||
| 109 | vk::ShaderModule vertex_shader; | 117 | vk::ShaderModule vertex_shader; |
| 110 | vk::ShaderModule fragment_shader; | 118 | vk::ShaderModule fxaa_vertex_shader; |
| 119 | vk::ShaderModule fxaa_fragment_shader; | ||
| 120 | vk::ShaderModule bilinear_fragment_shader; | ||
| 121 | vk::ShaderModule bicubic_fragment_shader; | ||
| 122 | vk::ShaderModule gaussian_fragment_shader; | ||
| 123 | vk::ShaderModule scaleforce_fragment_shader; | ||
| 111 | vk::DescriptorPool descriptor_pool; | 124 | vk::DescriptorPool descriptor_pool; |
| 112 | vk::DescriptorSetLayout descriptor_set_layout; | 125 | vk::DescriptorSetLayout descriptor_set_layout; |
| 113 | vk::PipelineLayout pipeline_layout; | 126 | vk::PipelineLayout pipeline_layout; |
| 114 | vk::Pipeline pipeline; | 127 | vk::Pipeline nearest_neightbor_pipeline; |
| 128 | vk::Pipeline bilinear_pipeline; | ||
| 129 | vk::Pipeline bicubic_pipeline; | ||
| 130 | vk::Pipeline gaussian_pipeline; | ||
| 131 | vk::Pipeline scaleforce_pipeline; | ||
| 115 | vk::RenderPass renderpass; | 132 | vk::RenderPass renderpass; |
| 116 | std::vector<vk::Framebuffer> framebuffers; | 133 | std::vector<vk::Framebuffer> framebuffers; |
| 117 | vk::DescriptorSets descriptor_sets; | 134 | vk::DescriptorSets descriptor_sets; |
| 135 | vk::Sampler nn_sampler; | ||
| 118 | vk::Sampler sampler; | 136 | vk::Sampler sampler; |
| 119 | 137 | ||
| 120 | vk::Buffer buffer; | 138 | vk::Buffer buffer; |
| @@ -126,8 +144,22 @@ private: | |||
| 126 | std::vector<vk::Image> raw_images; | 144 | std::vector<vk::Image> raw_images; |
| 127 | std::vector<vk::ImageView> raw_image_views; | 145 | std::vector<vk::ImageView> raw_image_views; |
| 128 | std::vector<MemoryCommit> raw_buffer_commits; | 146 | std::vector<MemoryCommit> raw_buffer_commits; |
| 147 | |||
| 148 | vk::DescriptorPool aa_descriptor_pool; | ||
| 149 | vk::DescriptorSetLayout aa_descriptor_set_layout; | ||
| 150 | vk::PipelineLayout aa_pipeline_layout; | ||
| 151 | vk::Pipeline aa_pipeline; | ||
| 152 | vk::RenderPass aa_renderpass; | ||
| 153 | vk::Framebuffer aa_framebuffer; | ||
| 154 | vk::DescriptorSets aa_descriptor_sets; | ||
| 155 | vk::Image aa_image; | ||
| 156 | vk::ImageView aa_image_view; | ||
| 157 | MemoryCommit aa_commit; | ||
| 158 | |||
| 129 | u32 raw_width = 0; | 159 | u32 raw_width = 0; |
| 130 | u32 raw_height = 0; | 160 | u32 raw_height = 0; |
| 161 | |||
| 162 | std::unique_ptr<FSR> fsr; | ||
| 131 | }; | 163 | }; |
| 132 | 164 | ||
| 133 | } // namespace Vulkan | 165 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8ac58bc2f..5ffd93499 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() { | |||
| 146 | } | 146 | } |
| 147 | 147 | ||
| 148 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | 148 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 149 | std::span<const VideoCommon::BufferCopy> copies) { | 149 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { |
| 150 | static constexpr VkMemoryBarrier READ_BARRIER{ | 150 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 151 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 151 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 152 | .pNext = nullptr, | 152 | .pNext = nullptr, |
| @@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 163 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); | 163 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); |
| 164 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); | 164 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 165 | scheduler.RequestOutsideRenderPassOperationContext(); | 165 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 166 | scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | 166 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { |
| 167 | if (barrier) { | ||
| 168 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 169 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); | ||
| 170 | } | ||
| 171 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 172 | if (barrier) { | ||
| 173 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 174 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 175 | } | ||
| 176 | }); | ||
| 177 | } | ||
| 178 | |||
| 179 | void BufferCacheRuntime::PreCopyBarrier() { | ||
| 180 | static constexpr VkMemoryBarrier READ_BARRIER{ | ||
| 181 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 182 | .pNext = nullptr, | ||
| 183 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 184 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 185 | }; | ||
| 186 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 187 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 167 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | 188 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 168 | 0, READ_BARRIER); | 189 | 0, READ_BARRIER); |
| 169 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | 190 | }); |
| 191 | } | ||
| 192 | |||
| 193 | void BufferCacheRuntime::PostCopyBarrier() { | ||
| 194 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 195 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 196 | .pNext = nullptr, | ||
| 197 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 198 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 199 | }; | ||
| 200 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 201 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 170 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | 202 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 171 | 0, WRITE_BARRIER); | 203 | 0, WRITE_BARRIER); |
| 172 | }); | 204 | }); |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index c27402ff0..1ee0d8420 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -69,8 +69,12 @@ public: | |||
| 69 | 69 | ||
| 70 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 70 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); |
| 71 | 71 | ||
| 72 | void PreCopyBarrier(); | ||
| 73 | |||
| 72 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, | 74 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, |
| 73 | std::span<const VideoCommon::BufferCopy> copies); | 75 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); |
| 76 | |||
| 77 | void PostCopyBarrier(); | ||
| 74 | 78 | ||
| 75 | void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value); | 79 | void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value); |
| 76 | 80 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 44faf626a..de36bcdb7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | namespace Vulkan { | 22 | namespace Vulkan { |
| 23 | 23 | ||
| 24 | using Shader::ImageBufferDescriptor; | 24 | using Shader::ImageBufferDescriptor; |
| 25 | using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; | ||
| 25 | using Tegra::Texture::TexturePair; | 26 | using Tegra::Texture::TexturePair; |
| 26 | 27 | ||
| 27 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, | 28 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, |
| @@ -108,8 +109,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 108 | texture_cache.SynchronizeComputeDescriptors(); | 109 | texture_cache.SynchronizeComputeDescriptors(); |
| 109 | 110 | ||
| 110 | static constexpr size_t max_elements = 64; | 111 | static constexpr size_t max_elements = 64; |
| 111 | std::array<ImageId, max_elements> image_view_ids; | 112 | boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views; |
| 112 | boost::container::static_vector<u32, max_elements> image_view_indices; | ||
| 113 | boost::container::static_vector<VkSampler, max_elements> samplers; | 113 | boost::container::static_vector<VkSampler, max_elements> samplers; |
| 114 | 114 | ||
| 115 | const auto& qmd{kepler_compute.launch_description}; | 115 | const auto& qmd{kepler_compute.launch_description}; |
| @@ -134,30 +134,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 134 | } | 134 | } |
| 135 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | 135 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); |
| 136 | }}; | 136 | }}; |
| 137 | const auto add_image{[&](const auto& desc) { | 137 | const auto add_image{[&](const auto& desc, bool blacklist) { |
| 138 | for (u32 index = 0; index < desc.count; ++index) { | 138 | for (u32 index = 0; index < desc.count; ++index) { |
| 139 | const auto handle{read_handle(desc, index)}; | 139 | const auto handle{read_handle(desc, index)}; |
| 140 | image_view_indices.push_back(handle.first); | 140 | views.push_back({ |
| 141 | .index = handle.first, | ||
| 142 | .blacklist = blacklist, | ||
| 143 | .id = {}, | ||
| 144 | }); | ||
| 141 | } | 145 | } |
| 142 | }}; | 146 | }}; |
| 143 | std::ranges::for_each(info.texture_buffer_descriptors, add_image); | 147 | for (const auto& desc : info.texture_buffer_descriptors) { |
| 144 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | 148 | add_image(desc, false); |
| 149 | } | ||
| 150 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 151 | add_image(desc, false); | ||
| 152 | } | ||
| 145 | for (const auto& desc : info.texture_descriptors) { | 153 | for (const auto& desc : info.texture_descriptors) { |
| 146 | for (u32 index = 0; index < desc.count; ++index) { | 154 | for (u32 index = 0; index < desc.count; ++index) { |
| 147 | const auto handle{read_handle(desc, index)}; | 155 | const auto handle{read_handle(desc, index)}; |
| 148 | image_view_indices.push_back(handle.first); | 156 | views.push_back({handle.first}); |
| 149 | 157 | ||
| 150 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | 158 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); |
| 151 | samplers.push_back(sampler->Handle()); | 159 | samplers.push_back(sampler->Handle()); |
| 152 | } | 160 | } |
| 153 | } | 161 | } |
| 154 | std::ranges::for_each(info.image_descriptors, add_image); | 162 | for (const auto& desc : info.image_descriptors) { |
| 155 | 163 | add_image(desc, desc.is_written); | |
| 156 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 164 | } |
| 157 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | 165 | texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); |
| 158 | 166 | ||
| 159 | buffer_cache.UnbindComputeTextureBuffers(); | 167 | buffer_cache.UnbindComputeTextureBuffers(); |
| 160 | ImageId* texture_buffer_ids{image_view_ids.data()}; | ||
| 161 | size_t index{}; | 168 | size_t index{}; |
| 162 | const auto add_buffer{[&](const auto& desc) { | 169 | const auto add_buffer{[&](const auto& desc) { |
| 163 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | 170 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; |
| @@ -166,11 +173,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 166 | if constexpr (is_image) { | 173 | if constexpr (is_image) { |
| 167 | is_written = desc.is_written; | 174 | is_written = desc.is_written; |
| 168 | } | 175 | } |
| 169 | ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); | 176 | ImageView& image_view = texture_cache.GetImageView(views[index].id); |
| 170 | buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), | 177 | buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), |
| 171 | image_view.BufferSize(), image_view.format, | 178 | image_view.BufferSize(), image_view.format, |
| 172 | is_written, is_image); | 179 | is_written, is_image); |
| 173 | ++texture_buffer_ids; | ||
| 174 | ++index; | 180 | ++index; |
| 175 | } | 181 | } |
| 176 | }}; | 182 | }}; |
| @@ -180,9 +186,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 180 | buffer_cache.UpdateComputeBuffers(); | 186 | buffer_cache.UpdateComputeBuffers(); |
| 181 | buffer_cache.BindHostComputeBuffers(); | 187 | buffer_cache.BindHostComputeBuffers(); |
| 182 | 188 | ||
| 189 | RescalingPushConstant rescaling; | ||
| 183 | const VkSampler* samplers_it{samplers.data()}; | 190 | const VkSampler* samplers_it{samplers.data()}; |
| 184 | const ImageId* views_it{image_view_ids.data()}; | 191 | const VideoCommon::ImageViewInOut* views_it{views.data()}; |
| 185 | PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); | 192 | PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it, |
| 193 | views_it); | ||
| 186 | 194 | ||
| 187 | if (!is_built.load(std::memory_order::relaxed)) { | 195 | if (!is_built.load(std::memory_order::relaxed)) { |
| 188 | // Wait for the pipeline to be built | 196 | // Wait for the pipeline to be built |
| @@ -192,11 +200,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 192 | }); | 200 | }); |
| 193 | } | 201 | } |
| 194 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | 202 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 195 | scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | 203 | const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); |
| 204 | scheduler.Record([this, descriptor_data, is_rescaling, | ||
| 205 | rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { | ||
| 196 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | 206 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); |
| 197 | if (!descriptor_set_layout) { | 207 | if (!descriptor_set_layout) { |
| 198 | return; | 208 | return; |
| 199 | } | 209 | } |
| 210 | if (is_rescaling) { | ||
| 211 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 212 | RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), | ||
| 213 | rescaling_data.data()); | ||
| 214 | } | ||
| 200 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | 215 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; |
| 201 | const vk::Device& dev{device.GetLogical()}; | 216 | const vk::Device& dev{device.GetLogical()}; |
| 202 | dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | 217 | dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); |
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp new file mode 100644 index 000000000..73629d229 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp | |||
| @@ -0,0 +1,553 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include "common/bit_cast.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/div_ceil.h" | ||
| 9 | |||
| 10 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" | ||
| 11 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" | ||
| 12 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" | ||
| 13 | #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_fsr.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 17 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 18 | |||
| 19 | namespace Vulkan { | ||
| 20 | namespace { | ||
| 21 | // Reimplementations of the constant generating functions in ffx_fsr1.h | ||
| 22 | // GCC generated a lot of warnings when using the official header. | ||
| 23 | u32 AU1_AH1_AF1(f32 f) { | ||
| 24 | static constexpr u32 base[512]{ | ||
| 25 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 26 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 27 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 28 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 29 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 30 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 31 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 32 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 33 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
| 34 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, | ||
| 35 | 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, | ||
| 36 | 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, | ||
| 37 | 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, | ||
| 38 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 39 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 40 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 41 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 42 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 43 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 44 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 45 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 46 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 47 | 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, | ||
| 48 | 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 49 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 50 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 51 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 52 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 53 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 54 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 55 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 56 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, | ||
| 57 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, | ||
| 58 | 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, | ||
| 59 | 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, | ||
| 60 | 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, | ||
| 61 | 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 62 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 63 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 64 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 65 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 66 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 67 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 68 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 69 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 70 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 71 | 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, | ||
| 72 | }; | ||
| 73 | static constexpr s8 shift[512]{ | ||
| 74 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 75 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 76 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 77 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 78 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 79 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 80 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, | ||
| 81 | 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, | ||
| 82 | 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, | ||
| 83 | 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 84 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 85 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 86 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 87 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 88 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 89 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 90 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 91 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 92 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 93 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 94 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 95 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 96 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 97 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, | ||
| 98 | 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, | ||
| 99 | 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, | ||
| 100 | 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 101 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 102 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 103 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 104 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 105 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 106 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 107 | 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, | ||
| 108 | 0x18, 0x18, | ||
| 109 | }; | ||
| 110 | const u32 u = Common::BitCast<u32>(f); | ||
| 111 | const u32 i = u >> 23; | ||
| 112 | return base[i] + ((u & 0x7fffff) >> shift[i]); | ||
| 113 | } | ||
| 114 | |||
| 115 | u32 AU1_AH2_AF2(f32 a[2]) { | ||
| 116 | return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); | ||
| 117 | } | ||
| 118 | |||
| 119 | void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, | ||
| 120 | f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, | ||
| 121 | f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { | ||
| 122 | con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); | ||
| 123 | con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); | ||
| 124 | con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); | ||
| 125 | con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); | ||
| 126 | con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); | ||
| 127 | con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); | ||
| 128 | con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); | ||
| 129 | con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); | ||
| 130 | con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); | ||
| 131 | con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); | ||
| 132 | con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); | ||
| 133 | con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); | ||
| 134 | con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); | ||
| 135 | con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); | ||
| 136 | con3[2] = con3[3] = 0; | ||
| 137 | } | ||
| 138 | |||
| 139 | void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], | ||
| 140 | f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, | ||
| 141 | f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, | ||
| 142 | f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { | ||
| 143 | FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, | ||
| 144 | inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); | ||
| 145 | con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + | ||
| 146 | inputOffsetInPixelsX); | ||
| 147 | con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + | ||
| 148 | inputOffsetInPixelsY); | ||
| 149 | } | ||
| 150 | |||
| 151 | void FsrRcasCon(u32* con, f32 sharpness) { | ||
| 152 | sharpness = std::exp2f(-sharpness); | ||
| 153 | f32 hSharp[2]{sharpness, sharpness}; | ||
| 154 | con[0] = Common::BitCast<u32>(sharpness); | ||
| 155 | con[1] = AU1_AH2_AF2(hSharp); | ||
| 156 | con[2] = 0; | ||
| 157 | con[3] = 0; | ||
| 158 | } | ||
| 159 | } // Anonymous namespace | ||
| 160 | |||
| 161 | FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, | ||
| 162 | VkExtent2D output_size_) | ||
| 163 | : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_}, | ||
| 164 | output_size{output_size_} { | ||
| 165 | |||
| 166 | CreateImages(); | ||
| 167 | CreateSampler(); | ||
| 168 | CreateShaders(); | ||
| 169 | CreateDescriptorPool(); | ||
| 170 | CreateDescriptorSetLayout(); | ||
| 171 | CreateDescriptorSets(); | ||
| 172 | CreatePipelineLayout(); | ||
| 173 | CreatePipeline(); | ||
| 174 | } | ||
| 175 | |||
| 176 | VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, | ||
| 177 | VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { | ||
| 178 | |||
| 179 | UpdateDescriptorSet(image_index, image_view); | ||
| 180 | |||
| 181 | scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { | ||
| 182 | const VkImageMemoryBarrier base_barrier{ | ||
| 183 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 184 | .pNext = nullptr, | ||
| 185 | .srcAccessMask = 0, | ||
| 186 | .dstAccessMask = 0, | ||
| 187 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 188 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 189 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 190 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 191 | .image = {}, | ||
| 192 | .subresourceRange = | ||
| 193 | { | ||
| 194 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 195 | .baseMipLevel = 0, | ||
| 196 | .levelCount = 1, | ||
| 197 | .baseArrayLayer = 0, | ||
| 198 | .layerCount = 1, | ||
| 199 | }, | ||
| 200 | }; | ||
| 201 | |||
| 202 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); | ||
| 203 | |||
| 204 | std::array<u32, 4 * 4> push_constants; | ||
| 205 | FsrEasuConOffset( | ||
| 206 | push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, | ||
| 207 | push_constants.data() + 12, | ||
| 208 | |||
| 209 | static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), | ||
| 210 | static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height), | ||
| 211 | static_cast<f32>(output_size.width), static_cast<f32>(output_size.height), | ||
| 212 | static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); | ||
| 213 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); | ||
| 214 | |||
| 215 | { | ||
| 216 | VkImageMemoryBarrier fsr_write_barrier = base_barrier; | ||
| 217 | fsr_write_barrier.image = *images[image_index], | ||
| 218 | fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 219 | |||
| 220 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 221 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier); | ||
| 222 | } | ||
| 223 | |||
| 224 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, | ||
| 225 | descriptor_sets[image_index * 2], {}); | ||
| 226 | cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), | ||
| 227 | Common::DivCeil(output_size.height, 16u), 1); | ||
| 228 | |||
| 229 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); | ||
| 230 | |||
| 231 | FsrRcasCon(push_constants.data(), 0.25f); | ||
| 232 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); | ||
| 233 | |||
| 234 | { | ||
| 235 | std::array<VkImageMemoryBarrier, 2> barriers; | ||
| 236 | auto& fsr_read_barrier = barriers[0]; | ||
| 237 | auto& blit_write_barrier = barriers[1]; | ||
| 238 | |||
| 239 | fsr_read_barrier = base_barrier; | ||
| 240 | fsr_read_barrier.image = *images[image_index]; | ||
| 241 | fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 242 | fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | ||
| 243 | |||
| 244 | blit_write_barrier = base_barrier; | ||
| 245 | blit_write_barrier.image = *images[image_count + image_index]; | ||
| 246 | blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 247 | blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; | ||
| 248 | |||
| 249 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 250 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers); | ||
| 251 | } | ||
| 252 | |||
| 253 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, | ||
| 254 | descriptor_sets[image_index * 2 + 1], {}); | ||
| 255 | cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), | ||
| 256 | Common::DivCeil(output_size.height, 16u), 1); | ||
| 257 | |||
| 258 | { | ||
| 259 | std::array<VkImageMemoryBarrier, 1> barriers; | ||
| 260 | auto& blit_read_barrier = barriers[0]; | ||
| 261 | |||
| 262 | blit_read_barrier = base_barrier; | ||
| 263 | blit_read_barrier.image = *images[image_count + image_index]; | ||
| 264 | blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 265 | blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | ||
| 266 | |||
| 267 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 268 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers); | ||
| 269 | } | ||
| 270 | }); | ||
| 271 | |||
| 272 | return *image_views[image_count + image_index]; | ||
| 273 | } | ||
| 274 | |||
| 275 | void FSR::CreateDescriptorPool() { | ||
| 276 | const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ | ||
| 277 | { | ||
| 278 | .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 279 | .descriptorCount = static_cast<u32>(image_count * 2), | ||
| 280 | }, | ||
| 281 | { | ||
| 282 | .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 283 | .descriptorCount = static_cast<u32>(image_count * 2), | ||
| 284 | }, | ||
| 285 | }}; | ||
| 286 | |||
| 287 | const VkDescriptorPoolCreateInfo ci{ | ||
| 288 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | ||
| 289 | .pNext = nullptr, | ||
| 290 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, | ||
| 291 | .maxSets = static_cast<u32>(image_count * 2), | ||
| 292 | .poolSizeCount = static_cast<u32>(pool_sizes.size()), | ||
| 293 | .pPoolSizes = pool_sizes.data(), | ||
| 294 | }; | ||
| 295 | descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); | ||
| 296 | } | ||
| 297 | |||
| 298 | void FSR::CreateDescriptorSetLayout() { | ||
| 299 | const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{ | ||
| 300 | { | ||
| 301 | .binding = 0, | ||
| 302 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 303 | .descriptorCount = 1, | ||
| 304 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 305 | .pImmutableSamplers = sampler.address(), | ||
| 306 | }, | ||
| 307 | { | ||
| 308 | .binding = 1, | ||
| 309 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 310 | .descriptorCount = 1, | ||
| 311 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 312 | .pImmutableSamplers = sampler.address(), | ||
| 313 | }, | ||
| 314 | }}; | ||
| 315 | |||
| 316 | const VkDescriptorSetLayoutCreateInfo ci{ | ||
| 317 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 318 | .pNext = nullptr, | ||
| 319 | .flags = 0, | ||
| 320 | .bindingCount = static_cast<u32>(layout_bindings.size()), | ||
| 321 | .pBindings = layout_bindings.data(), | ||
| 322 | }; | ||
| 323 | |||
| 324 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); | ||
| 325 | } | ||
| 326 | |||
| 327 | void FSR::CreateDescriptorSets() { | ||
| 328 | const u32 sets = static_cast<u32>(image_count * 2); | ||
| 329 | const std::vector layouts(sets, *descriptor_set_layout); | ||
| 330 | |||
| 331 | const VkDescriptorSetAllocateInfo ai{ | ||
| 332 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | ||
| 333 | .pNext = nullptr, | ||
| 334 | .descriptorPool = *descriptor_pool, | ||
| 335 | .descriptorSetCount = sets, | ||
| 336 | .pSetLayouts = layouts.data(), | ||
| 337 | }; | ||
| 338 | |||
| 339 | descriptor_sets = descriptor_pool.Allocate(ai); | ||
| 340 | } | ||
| 341 | |||
| 342 | void FSR::CreateImages() { | ||
| 343 | images.resize(image_count * 2); | ||
| 344 | image_views.resize(image_count * 2); | ||
| 345 | buffer_commits.resize(image_count * 2); | ||
| 346 | |||
| 347 | for (size_t i = 0; i < image_count * 2; ++i) { | ||
| 348 | images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ | ||
| 349 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | ||
| 350 | .pNext = nullptr, | ||
| 351 | .flags = 0, | ||
| 352 | .imageType = VK_IMAGE_TYPE_2D, | ||
| 353 | .format = VK_FORMAT_R16G16B16A16_SFLOAT, | ||
| 354 | .extent = | ||
| 355 | { | ||
| 356 | .width = output_size.width, | ||
| 357 | .height = output_size.height, | ||
| 358 | .depth = 1, | ||
| 359 | }, | ||
| 360 | .mipLevels = 1, | ||
| 361 | .arrayLayers = 1, | ||
| 362 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 363 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 364 | .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | | ||
| 365 | VK_IMAGE_USAGE_SAMPLED_BIT, | ||
| 366 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 367 | .queueFamilyIndexCount = 0, | ||
| 368 | .pQueueFamilyIndices = nullptr, | ||
| 369 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 370 | }); | ||
| 371 | buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal); | ||
| 372 | image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | ||
| 373 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 374 | .pNext = nullptr, | ||
| 375 | .flags = 0, | ||
| 376 | .image = *images[i], | ||
| 377 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 378 | .format = VK_FORMAT_R16G16B16A16_SFLOAT, | ||
| 379 | .components = | ||
| 380 | { | ||
| 381 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 382 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 383 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 384 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 385 | }, | ||
| 386 | .subresourceRange = | ||
| 387 | { | ||
| 388 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 389 | .baseMipLevel = 0, | ||
| 390 | .levelCount = 1, | ||
| 391 | .baseArrayLayer = 0, | ||
| 392 | .layerCount = 1, | ||
| 393 | }, | ||
| 394 | }); | ||
| 395 | } | ||
| 396 | } | ||
| 397 | |||
| 398 | void FSR::CreatePipelineLayout() { | ||
| 399 | VkPushConstantRange push_const{ | ||
| 400 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 401 | .offset = 0, | ||
| 402 | .size = sizeof(std::array<u32, 4 * 4>), | ||
| 403 | }; | ||
| 404 | VkPipelineLayoutCreateInfo ci{ | ||
| 405 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 406 | .pNext = nullptr, | ||
| 407 | .flags = 0, | ||
| 408 | .setLayoutCount = 1, | ||
| 409 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 410 | .pushConstantRangeCount = 1, | ||
| 411 | .pPushConstantRanges = &push_const, | ||
| 412 | }; | ||
| 413 | |||
| 414 | pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); | ||
| 415 | } | ||
| 416 | |||
| 417 | void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { | ||
| 418 | const auto fsr_image_view = *image_views[image_index]; | ||
| 419 | const auto blit_image_view = *image_views[image_count + image_index]; | ||
| 420 | |||
| 421 | const VkDescriptorImageInfo image_info{ | ||
| 422 | .sampler = VK_NULL_HANDLE, | ||
| 423 | .imageView = image_view, | ||
| 424 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 425 | }; | ||
| 426 | const VkDescriptorImageInfo fsr_image_info{ | ||
| 427 | .sampler = VK_NULL_HANDLE, | ||
| 428 | .imageView = fsr_image_view, | ||
| 429 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 430 | }; | ||
| 431 | const VkDescriptorImageInfo blit_image_info{ | ||
| 432 | .sampler = VK_NULL_HANDLE, | ||
| 433 | .imageView = blit_image_view, | ||
| 434 | .imageLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 435 | }; | ||
| 436 | |||
| 437 | VkWriteDescriptorSet sampler_write{ | ||
| 438 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 439 | .pNext = nullptr, | ||
| 440 | .dstSet = descriptor_sets[image_index * 2], | ||
| 441 | .dstBinding = 0, | ||
| 442 | .dstArrayElement = 0, | ||
| 443 | .descriptorCount = 1, | ||
| 444 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 445 | .pImageInfo = &image_info, | ||
| 446 | .pBufferInfo = nullptr, | ||
| 447 | .pTexelBufferView = nullptr, | ||
| 448 | }; | ||
| 449 | |||
| 450 | VkWriteDescriptorSet output_write{ | ||
| 451 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 452 | .pNext = nullptr, | ||
| 453 | .dstSet = descriptor_sets[image_index * 2], | ||
| 454 | .dstBinding = 1, | ||
| 455 | .dstArrayElement = 0, | ||
| 456 | .descriptorCount = 1, | ||
| 457 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 458 | .pImageInfo = &fsr_image_info, | ||
| 459 | .pBufferInfo = nullptr, | ||
| 460 | .pTexelBufferView = nullptr, | ||
| 461 | }; | ||
| 462 | |||
| 463 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); | ||
| 464 | |||
| 465 | sampler_write.dstSet = descriptor_sets[image_index * 2 + 1]; | ||
| 466 | sampler_write.pImageInfo = &fsr_image_info; | ||
| 467 | output_write.dstSet = descriptor_sets[image_index * 2 + 1]; | ||
| 468 | output_write.pImageInfo = &blit_image_info; | ||
| 469 | |||
| 470 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); | ||
| 471 | } | ||
| 472 | |||
| 473 | void FSR::CreateSampler() { | ||
| 474 | const VkSamplerCreateInfo ci{ | ||
| 475 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | ||
| 476 | .pNext = nullptr, | ||
| 477 | .flags = 0, | ||
| 478 | .magFilter = VK_FILTER_LINEAR, | ||
| 479 | .minFilter = VK_FILTER_LINEAR, | ||
| 480 | .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, | ||
| 481 | .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, | ||
| 482 | .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, | ||
| 483 | .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, | ||
| 484 | .mipLodBias = 0.0f, | ||
| 485 | .anisotropyEnable = VK_FALSE, | ||
| 486 | .maxAnisotropy = 0.0f, | ||
| 487 | .compareEnable = VK_FALSE, | ||
| 488 | .compareOp = VK_COMPARE_OP_NEVER, | ||
| 489 | .minLod = 0.0f, | ||
| 490 | .maxLod = 0.0f, | ||
| 491 | .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, | ||
| 492 | .unnormalizedCoordinates = VK_FALSE, | ||
| 493 | }; | ||
| 494 | |||
| 495 | sampler = device.GetLogical().CreateSampler(ci); | ||
| 496 | } | ||
| 497 | |||
| 498 | void FSR::CreateShaders() { | ||
| 499 | if (device.IsFloat16Supported()) { | ||
| 500 | easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); | ||
| 501 | rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); | ||
| 502 | } else { | ||
| 503 | easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); | ||
| 504 | rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); | ||
| 505 | } | ||
| 506 | } | ||
| 507 | |||
| 508 | void FSR::CreatePipeline() { | ||
| 509 | VkPipelineShaderStageCreateInfo shader_stage_easu{ | ||
| 510 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 511 | .pNext = nullptr, | ||
| 512 | .flags = 0, | ||
| 513 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 514 | .module = *easu_shader, | ||
| 515 | .pName = "main", | ||
| 516 | .pSpecializationInfo = nullptr, | ||
| 517 | }; | ||
| 518 | |||
| 519 | VkPipelineShaderStageCreateInfo shader_stage_rcas{ | ||
| 520 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 521 | .pNext = nullptr, | ||
| 522 | .flags = 0, | ||
| 523 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 524 | .module = *rcas_shader, | ||
| 525 | .pName = "main", | ||
| 526 | .pSpecializationInfo = nullptr, | ||
| 527 | }; | ||
| 528 | |||
| 529 | VkComputePipelineCreateInfo pipeline_ci_easu{ | ||
| 530 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 531 | .pNext = nullptr, | ||
| 532 | .flags = 0, | ||
| 533 | .stage = shader_stage_easu, | ||
| 534 | .layout = *pipeline_layout, | ||
| 535 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 536 | .basePipelineIndex = 0, | ||
| 537 | }; | ||
| 538 | |||
| 539 | VkComputePipelineCreateInfo pipeline_ci_rcas{ | ||
| 540 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 541 | .pNext = nullptr, | ||
| 542 | .flags = 0, | ||
| 543 | .stage = shader_stage_rcas, | ||
| 544 | .layout = *pipeline_layout, | ||
| 545 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 546 | .basePipelineIndex = 0, | ||
| 547 | }; | ||
| 548 | |||
| 549 | easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu); | ||
| 550 | rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas); | ||
| 551 | } | ||
| 552 | |||
| 553 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h new file mode 100644 index 000000000..6bbec3d36 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fsr.h | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/math_util.h" | ||
| 8 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | class Device; | ||
| 14 | class VKScheduler; | ||
| 15 | |||
| 16 | class FSR { | ||
| 17 | public: | ||
| 18 | explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, | ||
| 19 | VkExtent2D output_size); | ||
| 20 | VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, | ||
| 21 | VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); | ||
| 22 | |||
| 23 | private: | ||
| 24 | void CreateDescriptorPool(); | ||
| 25 | void CreateDescriptorSetLayout(); | ||
| 26 | void CreateDescriptorSets(); | ||
| 27 | void CreateImages(); | ||
| 28 | void CreateSampler(); | ||
| 29 | void CreateShaders(); | ||
| 30 | void CreatePipeline(); | ||
| 31 | void CreatePipelineLayout(); | ||
| 32 | |||
| 33 | void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; | ||
| 34 | |||
| 35 | const Device& device; | ||
| 36 | MemoryAllocator& memory_allocator; | ||
| 37 | size_t image_count; | ||
| 38 | VkExtent2D output_size; | ||
| 39 | |||
| 40 | vk::DescriptorPool descriptor_pool; | ||
| 41 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 42 | vk::DescriptorSets descriptor_sets; | ||
| 43 | vk::PipelineLayout pipeline_layout; | ||
| 44 | vk::ShaderModule easu_shader; | ||
| 45 | vk::ShaderModule rcas_shader; | ||
| 46 | vk::Pipeline easu_pipeline; | ||
| 47 | vk::Pipeline rcas_pipeline; | ||
| 48 | vk::Sampler sampler; | ||
| 49 | std::vector<vk::Image> images; | ||
| 50 | std::vector<vk::ImageView> image_views; | ||
| 51 | std::vector<MemoryCommit> buffer_commits; | ||
| 52 | }; | ||
| 53 | |||
| 54 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8634c3316..616a7b457 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -32,6 +32,8 @@ namespace { | |||
| 32 | using boost::container::small_vector; | 32 | using boost::container::small_vector; |
| 33 | using boost::container::static_vector; | 33 | using boost::container::static_vector; |
| 34 | using Shader::ImageBufferDescriptor; | 34 | using Shader::ImageBufferDescriptor; |
| 35 | using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET; | ||
| 36 | using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; | ||
| 35 | using Tegra::Texture::TexturePair; | 37 | using Tegra::Texture::TexturePair; |
| 36 | using VideoCore::Surface::PixelFormat; | 38 | using VideoCore::Surface::PixelFormat; |
| 37 | using VideoCore::Surface::PixelFormatFromDepthFormat; | 39 | using VideoCore::Surface::PixelFormatFromDepthFormat; |
| @@ -235,6 +237,7 @@ GraphicsPipeline::GraphicsPipeline( | |||
| 235 | stage_infos[stage] = *info; | 237 | stage_infos[stage] = *info; |
| 236 | enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; | 238 | enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; |
| 237 | std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | 239 | std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); |
| 240 | num_textures += Shader::NumDescriptors(info->texture_descriptors); | ||
| 238 | } | 241 | } |
| 239 | auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { | 242 | auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { |
| 240 | DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; | 243 | DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; |
| @@ -277,11 +280,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { | |||
| 277 | 280 | ||
| 278 | template <typename Spec> | 281 | template <typename Spec> |
| 279 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | 282 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { |
| 280 | std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids; | 283 | std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; |
| 281 | std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices; | ||
| 282 | std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; | 284 | std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; |
| 283 | size_t sampler_index{}; | 285 | size_t sampler_index{}; |
| 284 | size_t image_index{}; | 286 | size_t view_index{}; |
| 285 | 287 | ||
| 286 | texture_cache.SynchronizeGraphicsDescriptors(); | 288 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 287 | 289 | ||
| @@ -322,26 +324,30 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 322 | } | 324 | } |
| 323 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | 325 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); |
| 324 | }}; | 326 | }}; |
| 325 | const auto add_image{[&](const auto& desc) { | 327 | const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { |
| 326 | for (u32 index = 0; index < desc.count; ++index) { | 328 | for (u32 index = 0; index < desc.count; ++index) { |
| 327 | const auto handle{read_handle(desc, index)}; | 329 | const auto handle{read_handle(desc, index)}; |
| 328 | image_view_indices[image_index++] = handle.first; | 330 | views[view_index++] = { |
| 331 | .index = handle.first, | ||
| 332 | .blacklist = blacklist, | ||
| 333 | .id = {}, | ||
| 334 | }; | ||
| 329 | } | 335 | } |
| 330 | }}; | 336 | }}; |
| 331 | if constexpr (Spec::has_texture_buffers) { | 337 | if constexpr (Spec::has_texture_buffers) { |
| 332 | for (const auto& desc : info.texture_buffer_descriptors) { | 338 | for (const auto& desc : info.texture_buffer_descriptors) { |
| 333 | add_image(desc); | 339 | add_image(desc, false); |
| 334 | } | 340 | } |
| 335 | } | 341 | } |
| 336 | if constexpr (Spec::has_image_buffers) { | 342 | if constexpr (Spec::has_image_buffers) { |
| 337 | for (const auto& desc : info.image_buffer_descriptors) { | 343 | for (const auto& desc : info.image_buffer_descriptors) { |
| 338 | add_image(desc); | 344 | add_image(desc, false); |
| 339 | } | 345 | } |
| 340 | } | 346 | } |
| 341 | for (const auto& desc : info.texture_descriptors) { | 347 | for (const auto& desc : info.texture_descriptors) { |
| 342 | for (u32 index = 0; index < desc.count; ++index) { | 348 | for (u32 index = 0; index < desc.count; ++index) { |
| 343 | const auto handle{read_handle(desc, index)}; | 349 | const auto handle{read_handle(desc, index)}; |
| 344 | image_view_indices[image_index++] = handle.first; | 350 | views[view_index++] = {handle.first}; |
| 345 | 351 | ||
| 346 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; | 352 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; |
| 347 | samplers[sampler_index++] = sampler->Handle(); | 353 | samplers[sampler_index++] = sampler->Handle(); |
| @@ -349,7 +355,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 349 | } | 355 | } |
| 350 | if constexpr (Spec::has_images) { | 356 | if constexpr (Spec::has_images) { |
| 351 | for (const auto& desc : info.image_descriptors) { | 357 | for (const auto& desc : info.image_descriptors) { |
| 352 | add_image(desc); | 358 | add_image(desc, desc.is_written); |
| 353 | } | 359 | } |
| 354 | } | 360 | } |
| 355 | }}; | 361 | }}; |
| @@ -368,10 +374,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 368 | if constexpr (Spec::enabled_stages[4]) { | 374 | if constexpr (Spec::enabled_stages[4]) { |
| 369 | config_stage(4); | 375 | config_stage(4); |
| 370 | } | 376 | } |
| 371 | const std::span indices_span(image_view_indices.data(), image_index); | 377 | texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index)); |
| 372 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 373 | 378 | ||
| 374 | ImageId* texture_buffer_index{image_view_ids.data()}; | 379 | VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; |
| 375 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | 380 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { |
| 376 | size_t index{}; | 381 | size_t index{}; |
| 377 | const auto add_buffer{[&](const auto& desc) { | 382 | const auto add_buffer{[&](const auto& desc) { |
| @@ -381,12 +386,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 381 | if constexpr (is_image) { | 386 | if constexpr (is_image) { |
| 382 | is_written = desc.is_written; | 387 | is_written = desc.is_written; |
| 383 | } | 388 | } |
| 384 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | 389 | ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; |
| 385 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | 390 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), |
| 386 | image_view.BufferSize(), image_view.format, | 391 | image_view.BufferSize(), image_view.format, |
| 387 | is_written, is_image); | 392 | is_written, is_image); |
| 388 | ++index; | 393 | ++index; |
| 389 | ++texture_buffer_index; | 394 | ++texture_buffer_it; |
| 390 | } | 395 | } |
| 391 | }}; | 396 | }}; |
| 392 | buffer_cache.UnbindGraphicsTextureBuffers(stage); | 397 | buffer_cache.UnbindGraphicsTextureBuffers(stage); |
| @@ -402,13 +407,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 402 | add_buffer(desc); | 407 | add_buffer(desc); |
| 403 | } | 408 | } |
| 404 | } | 409 | } |
| 405 | for (const auto& desc : info.texture_descriptors) { | 410 | texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); |
| 406 | texture_buffer_index += desc.count; | ||
| 407 | } | ||
| 408 | if constexpr (Spec::has_images) { | 411 | if constexpr (Spec::has_images) { |
| 409 | for (const auto& desc : info.image_descriptors) { | 412 | texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); |
| 410 | texture_buffer_index += desc.count; | ||
| 411 | } | ||
| 412 | } | 413 | } |
| 413 | }}; | 414 | }}; |
| 414 | if constexpr (Spec::enabled_stages[0]) { | 415 | if constexpr (Spec::enabled_stages[0]) { |
| @@ -432,12 +433,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 432 | 433 | ||
| 433 | update_descriptor_queue.Acquire(); | 434 | update_descriptor_queue.Acquire(); |
| 434 | 435 | ||
| 436 | RescalingPushConstant rescaling; | ||
| 435 | const VkSampler* samplers_it{samplers.data()}; | 437 | const VkSampler* samplers_it{samplers.data()}; |
| 436 | const ImageId* views_it{image_view_ids.data()}; | 438 | const VideoCommon::ImageViewInOut* views_it{views.data()}; |
| 437 | const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { | 439 | const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { |
| 438 | buffer_cache.BindHostStageBuffers(stage); | 440 | buffer_cache.BindHostStageBuffers(stage); |
| 439 | PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, | 441 | PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling, |
| 440 | update_descriptor_queue); | 442 | samplers_it, views_it); |
| 441 | }}; | 443 | }}; |
| 442 | if constexpr (Spec::enabled_stages[0]) { | 444 | if constexpr (Spec::enabled_stages[0]) { |
| 443 | prepare_stage(0); | 445 | prepare_stage(0); |
| @@ -454,10 +456,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 454 | if constexpr (Spec::enabled_stages[4]) { | 456 | if constexpr (Spec::enabled_stages[4]) { |
| 455 | prepare_stage(4); | 457 | prepare_stage(4); |
| 456 | } | 458 | } |
| 457 | ConfigureDraw(); | 459 | ConfigureDraw(rescaling); |
| 458 | } | 460 | } |
| 459 | 461 | ||
| 460 | void GraphicsPipeline::ConfigureDraw() { | 462 | void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { |
| 461 | texture_cache.UpdateRenderTargets(false); | 463 | texture_cache.UpdateRenderTargets(false); |
| 462 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); | 464 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); |
| 463 | 465 | ||
| @@ -468,12 +470,25 @@ void GraphicsPipeline::ConfigureDraw() { | |||
| 468 | build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); | 470 | build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); |
| 469 | }); | 471 | }); |
| 470 | } | 472 | } |
| 473 | const bool is_rescaling{texture_cache.IsRescaling()}; | ||
| 474 | const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; | ||
| 471 | const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; | 475 | const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; |
| 472 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | 476 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 473 | scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { | 477 | scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), |
| 478 | is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) { | ||
| 474 | if (bind_pipeline) { | 479 | if (bind_pipeline) { |
| 475 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); | 480 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); |
| 476 | } | 481 | } |
| 482 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, | ||
| 483 | RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), | ||
| 484 | rescaling_data.data()); | ||
| 485 | if (update_rescaling) { | ||
| 486 | const f32 config_down_factor{Settings::values.resolution_info.down_factor}; | ||
| 487 | const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; | ||
| 488 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, | ||
| 489 | RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), | ||
| 490 | &scale_down_factor); | ||
| 491 | } | ||
| 477 | if (!descriptor_set_layout) { | 492 | if (!descriptor_set_layout) { |
| 478 | return; | 493 | return; |
| 479 | } | 494 | } |
| @@ -826,18 +841,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { | |||
| 826 | void GraphicsPipeline::Validate() { | 841 | void GraphicsPipeline::Validate() { |
| 827 | size_t num_images{}; | 842 | size_t num_images{}; |
| 828 | for (const auto& info : stage_infos) { | 843 | for (const auto& info : stage_infos) { |
| 829 | for (const auto& desc : info.texture_buffer_descriptors) { | 844 | num_images += Shader::NumDescriptors(info.texture_buffer_descriptors); |
| 830 | num_images += desc.count; | 845 | num_images += Shader::NumDescriptors(info.image_buffer_descriptors); |
| 831 | } | 846 | num_images += Shader::NumDescriptors(info.texture_descriptors); |
| 832 | for (const auto& desc : info.image_buffer_descriptors) { | 847 | num_images += Shader::NumDescriptors(info.image_descriptors); |
| 833 | num_images += desc.count; | ||
| 834 | } | ||
| 835 | for (const auto& desc : info.texture_descriptors) { | ||
| 836 | num_images += desc.count; | ||
| 837 | } | ||
| 838 | for (const auto& desc : info.image_descriptors) { | ||
| 839 | num_images += desc.count; | ||
| 840 | } | ||
| 841 | } | 848 | } |
| 842 | ASSERT(num_images <= MAX_IMAGE_ELEMENTS); | 849 | ASSERT(num_images <= MAX_IMAGE_ELEMENTS); |
| 843 | } | 850 | } |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1c780e944..a0c1d8f07 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -62,6 +62,7 @@ namespace Vulkan { | |||
| 62 | class Device; | 62 | class Device; |
| 63 | class PipelineStatistics; | 63 | class PipelineStatistics; |
| 64 | class RenderPassCache; | 64 | class RenderPassCache; |
| 65 | class RescalingPushConstant; | ||
| 65 | class VKScheduler; | 66 | class VKScheduler; |
| 66 | class VKUpdateDescriptorQueue; | 67 | class VKUpdateDescriptorQueue; |
| 67 | 68 | ||
| @@ -113,7 +114,7 @@ private: | |||
| 113 | template <typename Spec> | 114 | template <typename Spec> |
| 114 | void ConfigureImpl(bool is_indexed); | 115 | void ConfigureImpl(bool is_indexed); |
| 115 | 116 | ||
| 116 | void ConfigureDraw(); | 117 | void ConfigureDraw(const RescalingPushConstant& rescaling); |
| 117 | 118 | ||
| 118 | void MakePipeline(VkRenderPass render_pass); | 119 | void MakePipeline(VkRenderPass render_pass); |
| 119 | 120 | ||
| @@ -138,6 +139,7 @@ private: | |||
| 138 | std::array<Shader::Info, NUM_STAGES> stage_infos; | 139 | std::array<Shader::Info, NUM_STAGES> stage_infos; |
| 139 | std::array<u32, 5> enabled_uniform_buffer_masks{}; | 140 | std::array<u32, 5> enabled_uniform_buffer_masks{}; |
| 140 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; | 141 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; |
| 142 | u32 num_textures{}; | ||
| 141 | 143 | ||
| 142 | vk::DescriptorSetLayout descriptor_set_layout; | 144 | vk::DescriptorSetLayout descriptor_set_layout; |
| 143 | DescriptorAllocator descriptor_allocator; | 145 | DescriptorAllocator descriptor_allocator; |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0886b7da8..9be9c9bed 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -70,7 +70,9 @@ public: | |||
| 70 | return; | 70 | return; |
| 71 | } | 71 | } |
| 72 | // If none of the above is hit, fallback to a regular wait | 72 | // If none of the above is hit, fallback to a regular wait |
| 73 | semaphore.Wait(tick); | 73 | while (!semaphore.Wait(tick)) { |
| 74 | } | ||
| 75 | Refresh(); | ||
| 74 | } | 76 | } |
| 75 | 77 | ||
| 76 | private: | 78 | private: |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 30b47a7a0..fd334a146 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -58,18 +58,28 @@ struct DrawParams { | |||
| 58 | bool is_indexed; | 58 | bool is_indexed; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { | 61 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { |
| 62 | const auto& src = regs.viewport_transform[index]; | 62 | const auto& src = regs.viewport_transform[index]; |
| 63 | const float width = src.scale_x * 2.0f; | 63 | const auto conv = [scale](float value) { |
| 64 | float y = src.translate_y - src.scale_y; | 64 | float new_value = value * scale; |
| 65 | float height = src.scale_y * 2.0f; | 65 | if (scale < 1.0f) { |
| 66 | const bool sign = std::signbit(value); | ||
| 67 | new_value = std::round(std::abs(new_value)); | ||
| 68 | new_value = sign ? -new_value : new_value; | ||
| 69 | } | ||
| 70 | return new_value; | ||
| 71 | }; | ||
| 72 | const float x = conv(src.translate_x - src.scale_x); | ||
| 73 | const float width = conv(src.scale_x * 2.0f); | ||
| 74 | float y = conv(src.translate_y - src.scale_y); | ||
| 75 | float height = conv(src.scale_y * 2.0f); | ||
| 66 | if (regs.screen_y_control.y_negate) { | 76 | if (regs.screen_y_control.y_negate) { |
| 67 | y += height; | 77 | y += height; |
| 68 | height = -height; | 78 | height = -height; |
| 69 | } | 79 | } |
| 70 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; | 80 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; |
| 71 | VkViewport viewport{ | 81 | VkViewport viewport{ |
| 72 | .x = src.translate_x - src.scale_x, | 82 | .x = x, |
| 73 | .y = y, | 83 | .y = y, |
| 74 | .width = width != 0.0f ? width : 1.0f, | 84 | .width = width != 0.0f ? width : 1.0f, |
| 75 | .height = height != 0.0f ? height : 1.0f, | 85 | .height = height != 0.0f ? height : 1.0f, |
| @@ -83,14 +93,27 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 83 | return viewport; | 93 | return viewport; |
| 84 | } | 94 | } |
| 85 | 95 | ||
| 86 | VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | 96 | VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) { |
| 87 | const auto& src = regs.scissor_test[index]; | 97 | const auto& src = regs.scissor_test[index]; |
| 88 | VkRect2D scissor; | 98 | VkRect2D scissor; |
| 99 | const auto scale_up = [&](s32 value) -> s32 { | ||
| 100 | if (value == 0) { | ||
| 101 | return 0U; | ||
| 102 | } | ||
| 103 | const s32 upset = value * up_scale; | ||
| 104 | s32 acumm = 0; | ||
| 105 | if ((up_scale >> down_shift) == 0) { | ||
| 106 | acumm = upset % 2; | ||
| 107 | } | ||
| 108 | const s32 converted_value = (value * up_scale) >> down_shift; | ||
| 109 | return value < 0 ? std::min<s32>(converted_value - acumm, -1) | ||
| 110 | : std::max<s32>(converted_value + acumm, 1); | ||
| 111 | }; | ||
| 89 | if (src.enable) { | 112 | if (src.enable) { |
| 90 | scissor.offset.x = static_cast<s32>(src.min_x); | 113 | scissor.offset.x = scale_up(static_cast<s32>(src.min_x)); |
| 91 | scissor.offset.y = static_cast<s32>(src.min_y); | 114 | scissor.offset.y = scale_up(static_cast<s32>(src.min_y)); |
| 92 | scissor.extent.width = src.max_x - src.min_x; | 115 | scissor.extent.width = scale_up(src.max_x - src.min_x); |
| 93 | scissor.extent.height = src.max_y - src.min_y; | 116 | scissor.extent.height = scale_up(src.max_y - src.min_y); |
| 94 | } else { | 117 | } else { |
| 95 | scissor.offset.x = 0; | 118 | scissor.offset.x = 0; |
| 96 | scissor.offset.y = 0; | 119 | scissor.offset.y = 0; |
| @@ -199,7 +222,7 @@ void RasterizerVulkan::Clear() { | |||
| 199 | 222 | ||
| 200 | query_cache.UpdateCounters(); | 223 | query_cache.UpdateCounters(); |
| 201 | 224 | ||
| 202 | const auto& regs = maxwell3d.regs; | 225 | auto& regs = maxwell3d.regs; |
| 203 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 226 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 204 | regs.clear_buffers.A; | 227 | regs.clear_buffers.A; |
| 205 | const bool use_depth = regs.clear_buffers.Z; | 228 | const bool use_depth = regs.clear_buffers.Z; |
| @@ -214,8 +237,16 @@ void RasterizerVulkan::Clear() { | |||
| 214 | const VkExtent2D render_area = framebuffer->RenderArea(); | 237 | const VkExtent2D render_area = framebuffer->RenderArea(); |
| 215 | scheduler.RequestRenderpass(framebuffer); | 238 | scheduler.RequestRenderpass(framebuffer); |
| 216 | 239 | ||
| 240 | u32 up_scale = 1; | ||
| 241 | u32 down_shift = 0; | ||
| 242 | if (texture_cache.IsRescaling()) { | ||
| 243 | up_scale = Settings::values.resolution_info.up_scale; | ||
| 244 | down_shift = Settings::values.resolution_info.down_shift; | ||
| 245 | } | ||
| 246 | UpdateViewportsState(regs); | ||
| 247 | |||
| 217 | VkClearRect clear_rect{ | 248 | VkClearRect clear_rect{ |
| 218 | .rect = GetScissorState(regs, 0), | 249 | .rect = GetScissorState(regs, 0, up_scale, down_shift), |
| 219 | .baseArrayLayer = regs.clear_buffers.layer, | 250 | .baseArrayLayer = regs.clear_buffers.layer, |
| 220 | .layerCount = 1, | 251 | .layerCount = 1, |
| 221 | }; | 252 | }; |
| @@ -230,7 +261,38 @@ void RasterizerVulkan::Clear() { | |||
| 230 | const u32 color_attachment = regs.clear_buffers.RT; | 261 | const u32 color_attachment = regs.clear_buffers.RT; |
| 231 | if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { | 262 | if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { |
| 232 | VkClearValue clear_value; | 263 | VkClearValue clear_value; |
| 233 | std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); | 264 | bool is_integer = false; |
| 265 | bool is_signed = false; | ||
| 266 | size_t int_size = 8; | ||
| 267 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) { | ||
| 268 | const auto& this_rt = regs.rt[i]; | ||
| 269 | if (this_rt.Address() == 0) { | ||
| 270 | continue; | ||
| 271 | } | ||
| 272 | if (this_rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 273 | continue; | ||
| 274 | } | ||
| 275 | const auto format = | ||
| 276 | VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format); | ||
| 277 | is_integer = IsPixelFormatInteger(format); | ||
| 278 | is_signed = IsPixelFormatSignedInteger(format); | ||
| 279 | int_size = PixelComponentSizeBitsInteger(format); | ||
| 280 | break; | ||
| 281 | } | ||
| 282 | if (!is_integer) { | ||
| 283 | std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); | ||
| 284 | } else if (!is_signed) { | ||
| 285 | for (size_t i = 0; i < 4; i++) { | ||
| 286 | clear_value.color.uint32[i] = static_cast<u32>( | ||
| 287 | static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]); | ||
| 288 | } | ||
| 289 | } else { | ||
| 290 | for (size_t i = 0; i < 4; i++) { | ||
| 291 | clear_value.color.int32[i] = | ||
| 292 | static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) * | ||
| 293 | (regs.clear_color[i] - 0.5f)); | ||
| 294 | } | ||
| 295 | } | ||
| 234 | 296 | ||
| 235 | scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { | 297 | scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { |
| 236 | const VkClearAttachment attachment{ | 298 | const VkClearAttachment attachment{ |
| @@ -595,15 +657,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 595 | if (!state_tracker.TouchViewports()) { | 657 | if (!state_tracker.TouchViewports()) { |
| 596 | return; | 658 | return; |
| 597 | } | 659 | } |
| 660 | const bool is_rescaling{texture_cache.IsRescaling()}; | ||
| 661 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; | ||
| 598 | const std::array viewports{ | 662 | const std::array viewports{ |
| 599 | GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), | 663 | GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), |
| 600 | GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), | 664 | GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), |
| 601 | GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), | 665 | GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), |
| 602 | GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), | 666 | GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale), |
| 603 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | 667 | GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale), |
| 604 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | 668 | GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale), |
| 605 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | 669 | GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), |
| 606 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), | 670 | GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), |
| 607 | }; | 671 | }; |
| 608 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | 672 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); |
| 609 | } | 673 | } |
| @@ -612,13 +676,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 612 | if (!state_tracker.TouchScissors()) { | 676 | if (!state_tracker.TouchScissors()) { |
| 613 | return; | 677 | return; |
| 614 | } | 678 | } |
| 679 | u32 up_scale = 1; | ||
| 680 | u32 down_shift = 0; | ||
| 681 | if (texture_cache.IsRescaling()) { | ||
| 682 | up_scale = Settings::values.resolution_info.up_scale; | ||
| 683 | down_shift = Settings::values.resolution_info.down_shift; | ||
| 684 | } | ||
| 615 | const std::array scissors{ | 685 | const std::array scissors{ |
| 616 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | 686 | GetScissorState(regs, 0, up_scale, down_shift), |
| 617 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | 687 | GetScissorState(regs, 1, up_scale, down_shift), |
| 618 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | 688 | GetScissorState(regs, 2, up_scale, down_shift), |
| 619 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | 689 | GetScissorState(regs, 3, up_scale, down_shift), |
| 620 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | 690 | GetScissorState(regs, 4, up_scale, down_shift), |
| 621 | GetScissorState(regs, 15), | 691 | GetScissorState(regs, 5, up_scale, down_shift), |
| 692 | GetScissorState(regs, 6, up_scale, down_shift), | ||
| 693 | GetScissorState(regs, 7, up_scale, down_shift), | ||
| 694 | GetScissorState(regs, 8, up_scale, down_shift), | ||
| 695 | GetScissorState(regs, 9, up_scale, down_shift), | ||
| 696 | GetScissorState(regs, 10, up_scale, down_shift), | ||
| 697 | GetScissorState(regs, 11, up_scale, down_shift), | ||
| 698 | GetScissorState(regs, 12, up_scale, down_shift), | ||
| 699 | GetScissorState(regs, 13, up_scale, down_shift), | ||
| 700 | GetScissorState(regs, 14, up_scale, down_shift), | ||
| 701 | GetScissorState(regs, 15, up_scale, down_shift), | ||
| 622 | }; | 702 | }; |
| 623 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | 703 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); |
| 624 | } | 704 | } |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 0c11c814f..3bfdf41ba 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { | |||
| 128 | return true; | 128 | return true; |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | bool VKScheduler::UpdateRescaling(bool is_rescaling) { | ||
| 132 | if (state.rescaling_defined && is_rescaling == state.is_rescaling) { | ||
| 133 | return false; | ||
| 134 | } | ||
| 135 | state.rescaling_defined = true; | ||
| 136 | state.is_rescaling = is_rescaling; | ||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 131 | void VKScheduler::WorkerThread(std::stop_token stop_token) { | 140 | void VKScheduler::WorkerThread(std::stop_token stop_token) { |
| 132 | Common::SetCurrentThreadName("yuzu:VulkanWorker"); | 141 | Common::SetCurrentThreadName("yuzu:VulkanWorker"); |
| 133 | do { | 142 | do { |
| @@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() { | |||
| 227 | 236 | ||
| 228 | void VKScheduler::InvalidateState() { | 237 | void VKScheduler::InvalidateState() { |
| 229 | state.graphics_pipeline = nullptr; | 238 | state.graphics_pipeline = nullptr; |
| 239 | state.rescaling_defined = false; | ||
| 230 | state_tracker.InvalidateCommandBufferState(); | 240 | state_tracker.InvalidateCommandBufferState(); |
| 231 | } | 241 | } |
| 232 | 242 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 85fc1712f..1b06c9296 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -56,6 +56,9 @@ public: | |||
| 56 | /// Update the pipeline to the current execution context. | 56 | /// Update the pipeline to the current execution context. |
| 57 | bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); | 57 | bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); |
| 58 | 58 | ||
| 59 | /// Update the rescaling state. Returns true if the state has to be updated. | ||
| 60 | bool UpdateRescaling(bool is_rescaling); | ||
| 61 | |||
| 59 | /// Invalidates current command buffer state except for render passes | 62 | /// Invalidates current command buffer state except for render passes |
| 60 | void InvalidateState(); | 63 | void InvalidateState(); |
| 61 | 64 | ||
| @@ -185,6 +188,8 @@ private: | |||
| 185 | VkFramebuffer framebuffer = nullptr; | 188 | VkFramebuffer framebuffer = nullptr; |
| 186 | VkExtent2D render_area = {0, 0}; | 189 | VkExtent2D render_area = {0, 0}; |
| 187 | GraphicsPipeline* graphics_pipeline = nullptr; | 190 | GraphicsPipeline* graphics_pipeline = nullptr; |
| 191 | bool is_rescaling = false; | ||
| 192 | bool rescaling_defined = false; | ||
| 188 | }; | 193 | }; |
| 189 | 194 | ||
| 190 | void WorkerThread(std::stop_token stop_token); | 195 | void WorkerThread(std::stop_token stop_token); |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 2f2d6b31f..40a149832 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -71,11 +71,15 @@ public: | |||
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | bool TouchViewports() { | 73 | bool TouchViewports() { |
| 74 | return Exchange(Dirty::Viewports, false); | 74 | const bool dirty_viewports = Exchange(Dirty::Viewports, false); |
| 75 | const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false); | ||
| 76 | return dirty_viewports || rescale_viewports; | ||
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | bool TouchScissors() { | 79 | bool TouchScissors() { |
| 78 | return Exchange(Dirty::Scissors, false); | 80 | const bool dirty_scissors = Exchange(Dirty::Scissors, false); |
| 81 | const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false); | ||
| 82 | return dirty_scissors || rescale_scissors; | ||
| 79 | } | 83 | } |
| 80 | 84 | ||
| 81 | bool TouchDepthBias() { | 85 | bool TouchDepthBias() { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 06c5fb867..407fd2a15 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -32,10 +32,12 @@ using Tegra::Engines::Fermi2D; | |||
| 32 | using Tegra::Texture::SwizzleSource; | 32 | using Tegra::Texture::SwizzleSource; |
| 33 | using Tegra::Texture::TextureMipmapFilter; | 33 | using Tegra::Texture::TextureMipmapFilter; |
| 34 | using VideoCommon::BufferImageCopy; | 34 | using VideoCommon::BufferImageCopy; |
| 35 | using VideoCommon::ImageFlagBits; | ||
| 35 | using VideoCommon::ImageInfo; | 36 | using VideoCommon::ImageInfo; |
| 36 | using VideoCommon::ImageType; | 37 | using VideoCommon::ImageType; |
| 37 | using VideoCommon::SubresourceRange; | 38 | using VideoCommon::SubresourceRange; |
| 38 | using VideoCore::Surface::IsPixelFormatASTC; | 39 | using VideoCore::Surface::IsPixelFormatASTC; |
| 40 | using VideoCore::Surface::IsPixelFormatInteger; | ||
| 39 | 41 | ||
| 40 | namespace { | 42 | namespace { |
| 41 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | 43 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { |
| @@ -588,8 +590,158 @@ struct RangedBarrierRange { | |||
| 588 | UNREACHABLE_MSG("Invalid image format={}", format); | 590 | UNREACHABLE_MSG("Invalid image format={}", format); |
| 589 | return VK_FORMAT_R32_UINT; | 591 | return VK_FORMAT_R32_UINT; |
| 590 | } | 592 | } |
| 593 | |||
| 594 | void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, | ||
| 595 | VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, | ||
| 596 | bool up_scaling = true) { | ||
| 597 | const bool is_2d = info.type == ImageType::e2D; | ||
| 598 | const auto resources = info.resources; | ||
| 599 | const VkExtent2D extent{ | ||
| 600 | .width = info.size.width, | ||
| 601 | .height = info.size.height, | ||
| 602 | }; | ||
| 603 | // Depth and integer formats must use NEAREST filter for blits. | ||
| 604 | const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; | ||
| 605 | const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; | ||
| 606 | const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; | ||
| 607 | |||
| 608 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 609 | scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, | ||
| 610 | vk_filter, up_scaling](vk::CommandBuffer cmdbuf) { | ||
| 611 | const VkOffset2D src_size{ | ||
| 612 | .x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)), | ||
| 613 | .y = static_cast<s32>(is_2d && up_scaling ? extent.height | ||
| 614 | : resolution.ScaleUp(extent.height)), | ||
| 615 | }; | ||
| 616 | const VkOffset2D dst_size{ | ||
| 617 | .x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width), | ||
| 618 | .y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height) | ||
| 619 | : extent.height), | ||
| 620 | }; | ||
| 621 | boost::container::small_vector<VkImageBlit, 4> regions; | ||
| 622 | regions.reserve(resources.levels); | ||
| 623 | for (s32 level = 0; level < resources.levels; level++) { | ||
| 624 | regions.push_back({ | ||
| 625 | .srcSubresource{ | ||
| 626 | .aspectMask = aspect_mask, | ||
| 627 | .mipLevel = static_cast<u32>(level), | ||
| 628 | .baseArrayLayer = 0, | ||
| 629 | .layerCount = static_cast<u32>(resources.layers), | ||
| 630 | }, | ||
| 631 | .srcOffsets{ | ||
| 632 | { | ||
| 633 | .x = 0, | ||
| 634 | .y = 0, | ||
| 635 | .z = 0, | ||
| 636 | }, | ||
| 637 | { | ||
| 638 | .x = std::max(1, src_size.x >> level), | ||
| 639 | .y = std::max(1, src_size.y >> level), | ||
| 640 | .z = 1, | ||
| 641 | }, | ||
| 642 | }, | ||
| 643 | .dstSubresource{ | ||
| 644 | .aspectMask = aspect_mask, | ||
| 645 | .mipLevel = static_cast<u32>(level), | ||
| 646 | .baseArrayLayer = 0, | ||
| 647 | .layerCount = static_cast<u32>(resources.layers), | ||
| 648 | }, | ||
| 649 | .dstOffsets{ | ||
| 650 | { | ||
| 651 | .x = 0, | ||
| 652 | .y = 0, | ||
| 653 | .z = 0, | ||
| 654 | }, | ||
| 655 | { | ||
| 656 | .x = std::max(1, dst_size.x >> level), | ||
| 657 | .y = std::max(1, dst_size.y >> level), | ||
| 658 | .z = 1, | ||
| 659 | }, | ||
| 660 | }, | ||
| 661 | }); | ||
| 662 | } | ||
| 663 | const VkImageSubresourceRange subresource_range{ | ||
| 664 | .aspectMask = aspect_mask, | ||
| 665 | .baseMipLevel = 0, | ||
| 666 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 667 | .baseArrayLayer = 0, | ||
| 668 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 669 | }; | ||
| 670 | const std::array read_barriers{ | ||
| 671 | VkImageMemoryBarrier{ | ||
| 672 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 673 | .pNext = nullptr, | ||
| 674 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 675 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 676 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 677 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 678 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 679 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 680 | .image = src_image, | ||
| 681 | .subresourceRange = subresource_range, | ||
| 682 | }, | ||
| 683 | VkImageMemoryBarrier{ | ||
| 684 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 685 | .pNext = nullptr, | ||
| 686 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | | ||
| 687 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||
| 688 | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 689 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 690 | .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents | ||
| 691 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 692 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 693 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 694 | .image = dst_image, | ||
| 695 | .subresourceRange = subresource_range, | ||
| 696 | }, | ||
| 697 | }; | ||
| 698 | const std::array write_barriers{ | ||
| 699 | VkImageMemoryBarrier{ | ||
| 700 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 701 | .pNext = nullptr, | ||
| 702 | .srcAccessMask = 0, | ||
| 703 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, | ||
| 704 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 705 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 706 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 707 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 708 | .image = src_image, | ||
| 709 | .subresourceRange = subresource_range, | ||
| 710 | }, | ||
| 711 | VkImageMemoryBarrier{ | ||
| 712 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 713 | .pNext = nullptr, | ||
| 714 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 715 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, | ||
| 716 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 717 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 718 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 719 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 720 | .image = dst_image, | ||
| 721 | .subresourceRange = subresource_range, | ||
| 722 | }, | ||
| 723 | }; | ||
| 724 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 725 | 0, nullptr, nullptr, read_barriers); | ||
| 726 | cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, | ||
| 727 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); | ||
| 728 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 729 | 0, nullptr, nullptr, write_barriers); | ||
| 730 | }); | ||
| 731 | } | ||
| 591 | } // Anonymous namespace | 732 | } // Anonymous namespace |
| 592 | 733 | ||
| 734 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, | ||
| 735 | MemoryAllocator& memory_allocator_, | ||
| 736 | StagingBufferPool& staging_buffer_pool_, | ||
| 737 | BlitImageHelper& blit_image_helper_, | ||
| 738 | ASTCDecoderPass& astc_decoder_pass_, | ||
| 739 | RenderPassCache& render_pass_cache_) | ||
| 740 | : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, | ||
| 741 | staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, | ||
| 742 | astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_}, | ||
| 743 | resolution{Settings::values.resolution_info} {} | ||
| 744 | |||
| 593 | void TextureCacheRuntime::Finish() { | 745 | void TextureCacheRuntime::Finish() { |
| 594 | scheduler.Finish(); | 746 | scheduler.Finish(); |
| 595 | } | 747 | } |
| @@ -614,8 +766,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst | |||
| 614 | return; | 766 | return; |
| 615 | } | 767 | } |
| 616 | if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { | 768 | if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { |
| 617 | blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, | 769 | blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D), |
| 618 | operation); | 770 | dst_region, src_region, filter, operation); |
| 619 | return; | 771 | return; |
| 620 | } | 772 | } |
| 621 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | 773 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| @@ -719,26 +871,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst | |||
| 719 | }); | 871 | }); |
| 720 | } | 872 | } |
| 721 | 873 | ||
| 722 | void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { | 874 | void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, |
| 875 | bool rescaled) { | ||
| 876 | const u32 up_scale = rescaled ? resolution.up_scale : 1; | ||
| 877 | const u32 down_shift = rescaled ? resolution.down_shift : 0; | ||
| 723 | switch (dst_view.format) { | 878 | switch (dst_view.format) { |
| 724 | case PixelFormat::R16_UNORM: | 879 | case PixelFormat::R16_UNORM: |
| 725 | if (src_view.format == PixelFormat::D16_UNORM) { | 880 | if (src_view.format == PixelFormat::D16_UNORM) { |
| 726 | return blit_image_helper.ConvertD16ToR16(dst, src_view); | 881 | return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); |
| 727 | } | 882 | } |
| 728 | break; | 883 | break; |
| 729 | case PixelFormat::R32_FLOAT: | 884 | case PixelFormat::R32_FLOAT: |
| 730 | if (src_view.format == PixelFormat::D32_FLOAT) { | 885 | if (src_view.format == PixelFormat::D32_FLOAT) { |
| 731 | return blit_image_helper.ConvertD32ToR32(dst, src_view); | 886 | return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); |
| 732 | } | 887 | } |
| 733 | break; | 888 | break; |
| 734 | case PixelFormat::D16_UNORM: | 889 | case PixelFormat::D16_UNORM: |
| 735 | if (src_view.format == PixelFormat::R16_UNORM) { | 890 | if (src_view.format == PixelFormat::R16_UNORM) { |
| 736 | return blit_image_helper.ConvertR16ToD16(dst, src_view); | 891 | return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); |
| 737 | } | 892 | } |
| 738 | break; | 893 | break; |
| 739 | case PixelFormat::D32_FLOAT: | 894 | case PixelFormat::D32_FLOAT: |
| 740 | if (src_view.format == PixelFormat::R32_FLOAT) { | 895 | if (src_view.format == PixelFormat::R32_FLOAT) { |
| 741 | return blit_image_helper.ConvertR32ToD32(dst, src_view); | 896 | return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); |
| 742 | } | 897 | } |
| 743 | break; | 898 | break; |
| 744 | default: | 899 | default: |
| @@ -840,36 +995,39 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | |||
| 840 | return device.GetDeviceLocalMemory(); | 995 | return device.GetDeviceLocalMemory(); |
| 841 | } | 996 | } |
| 842 | 997 | ||
| 843 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | 998 | void TextureCacheRuntime::TickFrame() {} |
| 999 | |||
| 1000 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, | ||
| 844 | VAddr cpu_addr_) | 1001 | VAddr cpu_addr_) |
| 845 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | 1002 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, |
| 846 | image(MakeImage(runtime.device, info)), | 1003 | runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), |
| 847 | commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), | 1004 | commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), |
| 848 | aspect_mask(ImageAspectMask(info.format)) { | 1005 | aspect_mask(ImageAspectMask(info.format)) { |
| 849 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 1006 | if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { |
| 850 | if (Settings::values.accelerate_astc.GetValue()) { | 1007 | if (Settings::values.accelerate_astc.GetValue()) { |
| 851 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; | 1008 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; |
| 852 | } else { | 1009 | } else { |
| 853 | flags |= VideoCommon::ImageFlagBits::Converted; | 1010 | flags |= VideoCommon::ImageFlagBits::Converted; |
| 854 | } | 1011 | } |
| 855 | } | 1012 | } |
| 856 | if (runtime.device.HasDebuggingToolAttached()) { | 1013 | if (runtime->device.HasDebuggingToolAttached()) { |
| 857 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 1014 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 858 | } | 1015 | } |
| 859 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ | 1016 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ |
| 860 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | 1017 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, |
| 861 | .pNext = nullptr, | 1018 | .pNext = nullptr, |
| 862 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, | 1019 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, |
| 863 | }; | 1020 | }; |
| 864 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 1021 | current_image = *original_image; |
| 865 | const auto& device = runtime.device.GetLogical(); | 1022 | if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { |
| 1023 | const auto& device = runtime->device.GetLogical(); | ||
| 866 | storage_image_views.reserve(info.resources.levels); | 1024 | storage_image_views.reserve(info.resources.levels); |
| 867 | for (s32 level = 0; level < info.resources.levels; ++level) { | 1025 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 868 | storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ | 1026 | storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ |
| 869 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 1027 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 870 | .pNext = &storage_image_view_usage_create_info, | 1028 | .pNext = &storage_image_view_usage_create_info, |
| 871 | .flags = 0, | 1029 | .flags = 0, |
| 872 | .image = *image, | 1030 | .image = *original_image, |
| 873 | .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, | 1031 | .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, |
| 874 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, | 1032 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, |
| 875 | .components{ | 1033 | .components{ |
| @@ -890,26 +1048,39 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 890 | } | 1048 | } |
| 891 | } | 1049 | } |
| 892 | 1050 | ||
| 1051 | Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} | ||
| 1052 | |||
| 893 | Image::~Image() = default; | 1053 | Image::~Image() = default; |
| 894 | 1054 | ||
| 895 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 1055 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 896 | // TODO: Move this to another API | 1056 | // TODO: Move this to another API |
| 1057 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | ||
| 1058 | if (is_rescaled) { | ||
| 1059 | ScaleDown(true); | ||
| 1060 | } | ||
| 897 | scheduler->RequestOutsideRenderPassOperationContext(); | 1061 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 898 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); | 1062 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 899 | const VkBuffer src_buffer = map.buffer; | 1063 | const VkBuffer src_buffer = map.buffer; |
| 900 | const VkImage vk_image = *image; | 1064 | const VkImage vk_image = *original_image; |
| 901 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 1065 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| 902 | const bool is_initialized = std::exchange(initialized, true); | 1066 | const bool is_initialized = std::exchange(initialized, true); |
| 903 | scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, | 1067 | scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, |
| 904 | vk_copies](vk::CommandBuffer cmdbuf) { | 1068 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 905 | CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); | 1069 | CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); |
| 906 | }); | 1070 | }); |
| 1071 | if (is_rescaled) { | ||
| 1072 | ScaleUp(); | ||
| 1073 | } | ||
| 907 | } | 1074 | } |
| 908 | 1075 | ||
| 909 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 1076 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 1077 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | ||
| 1078 | if (is_rescaled) { | ||
| 1079 | ScaleDown(); | ||
| 1080 | } | ||
| 910 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); | 1081 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 911 | scheduler->RequestOutsideRenderPassOperationContext(); | 1082 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 912 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, | 1083 | scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask, |
| 913 | vk_copies](vk::CommandBuffer cmdbuf) { | 1084 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 914 | const VkImageMemoryBarrier read_barrier{ | 1085 | const VkImageMemoryBarrier read_barrier{ |
| 915 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 1086 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| @@ -959,6 +1130,146 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm | |||
| 959 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | 1130 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 960 | 0, memory_write_barrier, nullptr, image_write_barrier); | 1131 | 0, memory_write_barrier, nullptr, image_write_barrier); |
| 961 | }); | 1132 | }); |
| 1133 | if (is_rescaled) { | ||
| 1134 | ScaleUp(true); | ||
| 1135 | } | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | bool Image::ScaleUp(bool ignore) { | ||
| 1139 | if (True(flags & ImageFlagBits::Rescaled)) { | ||
| 1140 | return false; | ||
| 1141 | } | ||
| 1142 | ASSERT(info.type != ImageType::Linear); | ||
| 1143 | flags |= ImageFlagBits::Rescaled; | ||
| 1144 | const auto& resolution = runtime->resolution; | ||
| 1145 | if (!resolution.active) { | ||
| 1146 | return false; | ||
| 1147 | } | ||
| 1148 | has_scaled = true; | ||
| 1149 | const auto& device = runtime->device; | ||
| 1150 | if (!scaled_image) { | ||
| 1151 | const bool is_2d = info.type == ImageType::e2D; | ||
| 1152 | const u32 scaled_width = resolution.ScaleUp(info.size.width); | ||
| 1153 | const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; | ||
| 1154 | auto scaled_info = info; | ||
| 1155 | scaled_info.size.width = scaled_width; | ||
| 1156 | scaled_info.size.height = scaled_height; | ||
| 1157 | scaled_image = MakeImage(device, scaled_info); | ||
| 1158 | auto& allocator = runtime->memory_allocator; | ||
| 1159 | scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); | ||
| 1160 | ignore = false; | ||
| 1161 | } | ||
| 1162 | current_image = *scaled_image; | ||
| 1163 | if (ignore) { | ||
| 1164 | return true; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | if (aspect_mask == 0) { | ||
| 1168 | aspect_mask = ImageAspectMask(info.format); | ||
| 1169 | } | ||
| 1170 | static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; | ||
| 1171 | const PixelFormat format = StorageFormat(info.format); | ||
| 1172 | const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; | ||
| 1173 | const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; | ||
| 1174 | if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { | ||
| 1175 | BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); | ||
| 1176 | } else { | ||
| 1177 | return BlitScaleHelper(true); | ||
| 1178 | } | ||
| 1179 | return true; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | bool Image::ScaleDown(bool ignore) { | ||
| 1183 | if (False(flags & ImageFlagBits::Rescaled)) { | ||
| 1184 | return false; | ||
| 1185 | } | ||
| 1186 | ASSERT(info.type != ImageType::Linear); | ||
| 1187 | flags &= ~ImageFlagBits::Rescaled; | ||
| 1188 | const auto& resolution = runtime->resolution; | ||
| 1189 | if (!resolution.active) { | ||
| 1190 | return false; | ||
| 1191 | } | ||
| 1192 | current_image = *original_image; | ||
| 1193 | if (ignore) { | ||
| 1194 | return true; | ||
| 1195 | } | ||
| 1196 | if (aspect_mask == 0) { | ||
| 1197 | aspect_mask = ImageAspectMask(info.format); | ||
| 1198 | } | ||
| 1199 | static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; | ||
| 1200 | const PixelFormat format = StorageFormat(info.format); | ||
| 1201 | const auto& device = runtime->device; | ||
| 1202 | const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; | ||
| 1203 | const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; | ||
| 1204 | if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { | ||
| 1205 | BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); | ||
| 1206 | } else { | ||
| 1207 | return BlitScaleHelper(false); | ||
| 1208 | } | ||
| 1209 | return true; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | bool Image::BlitScaleHelper(bool scale_up) { | ||
| 1213 | using namespace VideoCommon; | ||
| 1214 | static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; | ||
| 1215 | const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; | ||
| 1216 | const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; | ||
| 1217 | const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear | ||
| 1218 | : Tegra::Engines::Fermi2D::Filter::Point; | ||
| 1219 | |||
| 1220 | const bool is_2d = info.type == ImageType::e2D; | ||
| 1221 | const auto& resolution = runtime->resolution; | ||
| 1222 | const u32 scaled_width = resolution.ScaleUp(info.size.width); | ||
| 1223 | const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; | ||
| 1224 | std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view; | ||
| 1225 | std::unique_ptr<Framebuffer>& blit_framebuffer = | ||
| 1226 | scale_up ? scale_framebuffer : normal_framebuffer; | ||
| 1227 | if (!blit_view) { | ||
| 1228 | const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); | ||
| 1229 | blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this); | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | const u32 src_width = scale_up ? info.size.width : scaled_width; | ||
| 1233 | const u32 src_height = scale_up ? info.size.height : scaled_height; | ||
| 1234 | const u32 dst_width = scale_up ? scaled_width : info.size.width; | ||
| 1235 | const u32 dst_height = scale_up ? scaled_height : info.size.height; | ||
| 1236 | const Region2D src_region{ | ||
| 1237 | .start = {0, 0}, | ||
| 1238 | .end = {static_cast<s32>(src_width), static_cast<s32>(src_height)}, | ||
| 1239 | }; | ||
| 1240 | const Region2D dst_region{ | ||
| 1241 | .start = {0, 0}, | ||
| 1242 | .end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)}, | ||
| 1243 | }; | ||
| 1244 | const VkExtent2D extent{ | ||
| 1245 | .width = std::max(scaled_width, info.size.width), | ||
| 1246 | .height = std::max(scaled_height, info.size.width), | ||
| 1247 | }; | ||
| 1248 | |||
| 1249 | auto* view_ptr = blit_view.get(); | ||
| 1250 | if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { | ||
| 1251 | if (!blit_framebuffer) { | ||
| 1252 | blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent); | ||
| 1253 | } | ||
| 1254 | const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); | ||
| 1255 | |||
| 1256 | runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, | ||
| 1257 | src_region, operation, BLIT_OPERATION); | ||
| 1258 | } else if (!runtime->device.IsBlitDepthStencilSupported() && | ||
| 1259 | aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | ||
| 1260 | if (!blit_framebuffer) { | ||
| 1261 | blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent); | ||
| 1262 | } | ||
| 1263 | runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), | ||
| 1264 | blit_view->StencilView(), dst_region, | ||
| 1265 | src_region, operation, BLIT_OPERATION); | ||
| 1266 | } else { | ||
| 1267 | // TODO: Use helper blits where applicable | ||
| 1268 | flags &= ~ImageFlagBits::Rescaled; | ||
| 1269 | LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format); | ||
| 1270 | return false; | ||
| 1271 | } | ||
| 1272 | return true; | ||
| 962 | } | 1273 | } |
| 963 | 1274 | ||
| 964 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, | 1275 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| @@ -1052,9 +1363,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | |||
| 1052 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | 1363 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, |
| 1053 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | 1364 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} |
| 1054 | 1365 | ||
| 1055 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) | 1366 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) |
| 1056 | : VideoCommon::ImageViewBase{params} {} | 1367 | : VideoCommon::ImageViewBase{params} {} |
| 1057 | 1368 | ||
| 1369 | ImageView::~ImageView() = default; | ||
| 1370 | |||
| 1058 | VkImageView ImageView::DepthView() { | 1371 | VkImageView ImageView::DepthView() { |
| 1059 | if (depth_view) { | 1372 | if (depth_view) { |
| 1060 | return *depth_view; | 1373 | return *depth_view; |
| @@ -1137,7 +1450,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t | |||
| 1137 | LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); | 1450 | LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); |
| 1138 | } | 1451 | } |
| 1139 | // Some games have samplers with garbage. Sanitize them here. | 1452 | // Some games have samplers with garbage. Sanitize them here. |
| 1140 | const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); | 1453 | const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); |
| 1454 | |||
| 1141 | sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ | 1455 | sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ |
| 1142 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, | 1456 | .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, |
| 1143 | .pNext = pnext, | 1457 | .pNext = pnext, |
| @@ -1162,7 +1476,29 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t | |||
| 1162 | } | 1476 | } |
| 1163 | 1477 | ||
| 1164 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, | 1478 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1165 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | 1479 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) |
| 1480 | : render_area{VkExtent2D{ | ||
| 1481 | .width = key.size.width, | ||
| 1482 | .height = key.size.height, | ||
| 1483 | }} { | ||
| 1484 | CreateFramebuffer(runtime, color_buffers, depth_buffer); | ||
| 1485 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 1486 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | ||
| 1487 | } | ||
| 1488 | } | ||
| 1489 | |||
| 1490 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, | ||
| 1491 | ImageView* depth_buffer, VkExtent2D extent) | ||
| 1492 | : render_area{extent} { | ||
| 1493 | std::array<ImageView*, NUM_RT> color_buffers{color_buffer}; | ||
| 1494 | CreateFramebuffer(runtime, color_buffers, depth_buffer); | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | Framebuffer::~Framebuffer() = default; | ||
| 1498 | |||
| 1499 | void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, | ||
| 1500 | std::span<ImageView*, NUM_RT> color_buffers, | ||
| 1501 | ImageView* depth_buffer) { | ||
| 1166 | std::vector<VkImageView> attachments; | 1502 | std::vector<VkImageView> attachments; |
| 1167 | RenderPassKey renderpass_key{}; | 1503 | RenderPassKey renderpass_key{}; |
| 1168 | s32 num_layers = 1; | 1504 | s32 num_layers = 1; |
| @@ -1200,10 +1536,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1200 | 1536 | ||
| 1201 | renderpass = runtime.render_pass_cache.Get(renderpass_key); | 1537 | renderpass = runtime.render_pass_cache.Get(renderpass_key); |
| 1202 | 1538 | ||
| 1203 | render_area = VkExtent2D{ | ||
| 1204 | .width = key.size.width, | ||
| 1205 | .height = key.size.height, | ||
| 1206 | }; | ||
| 1207 | num_color_buffers = static_cast<u32>(num_colors); | 1539 | num_color_buffers = static_cast<u32>(num_colors); |
| 1208 | framebuffer = runtime.device.GetLogical().CreateFramebuffer({ | 1540 | framebuffer = runtime.device.GetLogical().CreateFramebuffer({ |
| 1209 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | 1541 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, |
| @@ -1212,13 +1544,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1212 | .renderPass = renderpass, | 1544 | .renderPass = renderpass, |
| 1213 | .attachmentCount = static_cast<u32>(attachments.size()), | 1545 | .attachmentCount = static_cast<u32>(attachments.size()), |
| 1214 | .pAttachments = attachments.data(), | 1546 | .pAttachments = attachments.data(), |
| 1215 | .width = key.size.width, | 1547 | .width = render_area.width, |
| 1216 | .height = key.size.height, | 1548 | .height = render_area.height, |
| 1217 | .layers = static_cast<u32>(std::max(num_layers, 1)), | 1549 | .layers = static_cast<u32>(std::max(num_layers, 1)), |
| 1218 | }); | 1550 | }); |
| 1219 | if (runtime.device.HasDebuggingToolAttached()) { | ||
| 1220 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | ||
| 1221 | } | ||
| 1222 | } | 1551 | } |
| 1223 | 1552 | ||
| 1224 | void TextureCacheRuntime::AccelerateImageUpload( | 1553 | void TextureCacheRuntime::AccelerateImageUpload( |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b09c468e4..ff28b4e96 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -13,6 +13,10 @@ | |||
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 15 | ||
| 16 | namespace Settings { | ||
| 17 | struct ResolutionScalingInfo; | ||
| 18 | } | ||
| 19 | |||
| 16 | namespace Vulkan { | 20 | namespace Vulkan { |
| 17 | 21 | ||
| 18 | using VideoCommon::ImageId; | 22 | using VideoCommon::ImageId; |
| @@ -31,14 +35,14 @@ class RenderPassCache; | |||
| 31 | class StagingBufferPool; | 35 | class StagingBufferPool; |
| 32 | class VKScheduler; | 36 | class VKScheduler; |
| 33 | 37 | ||
| 34 | struct TextureCacheRuntime { | 38 | class TextureCacheRuntime { |
| 35 | const Device& device; | 39 | public: |
| 36 | VKScheduler& scheduler; | 40 | explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, |
| 37 | MemoryAllocator& memory_allocator; | 41 | MemoryAllocator& memory_allocator_, |
| 38 | StagingBufferPool& staging_buffer_pool; | 42 | StagingBufferPool& staging_buffer_pool_, |
| 39 | BlitImageHelper& blit_image_helper; | 43 | BlitImageHelper& blit_image_helper_, |
| 40 | ASTCDecoderPass& astc_decoder_pass; | 44 | ASTCDecoderPass& astc_decoder_pass_, |
| 41 | RenderPassCache& render_pass_cache; | 45 | RenderPassCache& render_pass_cache_); |
| 42 | 46 | ||
| 43 | void Finish(); | 47 | void Finish(); |
| 44 | 48 | ||
| @@ -46,6 +50,10 @@ struct TextureCacheRuntime { | |||
| 46 | 50 | ||
| 47 | StagingBufferRef DownloadStagingBuffer(size_t size); | 51 | StagingBufferRef DownloadStagingBuffer(size_t size); |
| 48 | 52 | ||
| 53 | void TickFrame(); | ||
| 54 | |||
| 55 | u64 GetDeviceLocalMemory() const; | ||
| 56 | |||
| 49 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 57 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 50 | const Region2D& dst_region, const Region2D& src_region, | 58 | const Region2D& dst_region, const Region2D& src_region, |
| 51 | Tegra::Engines::Fermi2D::Filter filter, | 59 | Tegra::Engines::Fermi2D::Filter filter, |
| @@ -53,7 +61,7 @@ struct TextureCacheRuntime { | |||
| 53 | 61 | ||
| 54 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 62 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 55 | 63 | ||
| 56 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); | 64 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); |
| 57 | 65 | ||
| 58 | bool CanAccelerateImageUpload(Image&) const noexcept { | 66 | bool CanAccelerateImageUpload(Image&) const noexcept { |
| 59 | return false; | 67 | return false; |
| @@ -74,13 +82,21 @@ struct TextureCacheRuntime { | |||
| 74 | return true; | 82 | return true; |
| 75 | } | 83 | } |
| 76 | 84 | ||
| 77 | u64 GetDeviceLocalMemory() const; | 85 | const Device& device; |
| 86 | VKScheduler& scheduler; | ||
| 87 | MemoryAllocator& memory_allocator; | ||
| 88 | StagingBufferPool& staging_buffer_pool; | ||
| 89 | BlitImageHelper& blit_image_helper; | ||
| 90 | ASTCDecoderPass& astc_decoder_pass; | ||
| 91 | RenderPassCache& render_pass_cache; | ||
| 92 | const Settings::ResolutionScalingInfo& resolution; | ||
| 78 | }; | 93 | }; |
| 79 | 94 | ||
| 80 | class Image : public VideoCommon::ImageBase { | 95 | class Image : public VideoCommon::ImageBase { |
| 81 | public: | 96 | public: |
| 82 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 97 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 83 | VAddr cpu_addr); | 98 | VAddr cpu_addr); |
| 99 | explicit Image(const VideoCommon::NullImageParams&); | ||
| 84 | 100 | ||
| 85 | ~Image(); | 101 | ~Image(); |
| 86 | 102 | ||
| @@ -97,7 +113,7 @@ public: | |||
| 97 | std::span<const VideoCommon::BufferImageCopy> copies); | 113 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 98 | 114 | ||
| 99 | [[nodiscard]] VkImage Handle() const noexcept { | 115 | [[nodiscard]] VkImage Handle() const noexcept { |
| 100 | return *image; | 116 | return current_image; |
| 101 | } | 117 | } |
| 102 | 118 | ||
| 103 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { | 119 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { |
| @@ -113,14 +129,30 @@ public: | |||
| 113 | return std::exchange(initialized, true); | 129 | return std::exchange(initialized, true); |
| 114 | } | 130 | } |
| 115 | 131 | ||
| 132 | bool ScaleUp(bool ignore = false); | ||
| 133 | |||
| 134 | bool ScaleDown(bool ignore = false); | ||
| 135 | |||
| 116 | private: | 136 | private: |
| 117 | VKScheduler* scheduler; | 137 | bool BlitScaleHelper(bool scale_up); |
| 118 | vk::Image image; | 138 | |
| 139 | VKScheduler* scheduler{}; | ||
| 140 | TextureCacheRuntime* runtime{}; | ||
| 141 | |||
| 142 | vk::Image original_image; | ||
| 119 | MemoryCommit commit; | 143 | MemoryCommit commit; |
| 120 | vk::ImageView image_view; | ||
| 121 | std::vector<vk::ImageView> storage_image_views; | 144 | std::vector<vk::ImageView> storage_image_views; |
| 122 | VkImageAspectFlags aspect_mask = 0; | 145 | VkImageAspectFlags aspect_mask = 0; |
| 123 | bool initialized = false; | 146 | bool initialized = false; |
| 147 | vk::Image scaled_image{}; | ||
| 148 | MemoryCommit scaled_commit{}; | ||
| 149 | VkImage current_image{}; | ||
| 150 | |||
| 151 | std::unique_ptr<Framebuffer> scale_framebuffer; | ||
| 152 | std::unique_ptr<ImageView> scale_view; | ||
| 153 | |||
| 154 | std::unique_ptr<Framebuffer> normal_framebuffer; | ||
| 155 | std::unique_ptr<ImageView> normal_view; | ||
| 124 | }; | 156 | }; |
| 125 | 157 | ||
| 126 | class ImageView : public VideoCommon::ImageViewBase { | 158 | class ImageView : public VideoCommon::ImageViewBase { |
| @@ -128,7 +160,15 @@ public: | |||
| 128 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | 160 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 129 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, | 161 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, |
| 130 | const VideoCommon::ImageViewInfo&, GPUVAddr); | 162 | const VideoCommon::ImageViewInfo&, GPUVAddr); |
| 131 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | 163 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); |
| 164 | |||
| 165 | ~ImageView(); | ||
| 166 | |||
| 167 | ImageView(const ImageView&) = delete; | ||
| 168 | ImageView& operator=(const ImageView&) = delete; | ||
| 169 | |||
| 170 | ImageView(ImageView&&) = default; | ||
| 171 | ImageView& operator=(ImageView&&) = default; | ||
| 132 | 172 | ||
| 133 | [[nodiscard]] VkImageView DepthView(); | 173 | [[nodiscard]] VkImageView DepthView(); |
| 134 | 174 | ||
| @@ -197,9 +237,23 @@ private: | |||
| 197 | 237 | ||
| 198 | class Framebuffer { | 238 | class Framebuffer { |
| 199 | public: | 239 | public: |
| 200 | explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, | 240 | explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 201 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); | 241 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key); |
| 202 | 242 | ||
| 243 | explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, | ||
| 244 | ImageView* depth_buffer, VkExtent2D extent); | ||
| 245 | |||
| 246 | ~Framebuffer(); | ||
| 247 | |||
| 248 | Framebuffer(const Framebuffer&) = delete; | ||
| 249 | Framebuffer& operator=(const Framebuffer&) = delete; | ||
| 250 | |||
| 251 | Framebuffer(Framebuffer&&) = default; | ||
| 252 | Framebuffer& operator=(Framebuffer&&) = default; | ||
| 253 | |||
| 254 | void CreateFramebuffer(TextureCacheRuntime& runtime, | ||
| 255 | std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer); | ||
| 256 | |||
| 203 | [[nodiscard]] VkFramebuffer Handle() const noexcept { | 257 | [[nodiscard]] VkFramebuffer Handle() const noexcept { |
| 204 | return *framebuffer; | 258 | return *framebuffer; |
| 205 | } | 259 | } |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index eb1746265..58d262446 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -279,6 +279,80 @@ bool IsPixelFormatSRGB(PixelFormat format) { | |||
| 279 | } | 279 | } |
| 280 | } | 280 | } |
| 281 | 281 | ||
| 282 | bool IsPixelFormatInteger(PixelFormat format) { | ||
| 283 | switch (format) { | ||
| 284 | case PixelFormat::A8B8G8R8_SINT: | ||
| 285 | case PixelFormat::A8B8G8R8_UINT: | ||
| 286 | case PixelFormat::A2B10G10R10_UINT: | ||
| 287 | case PixelFormat::R8_SINT: | ||
| 288 | case PixelFormat::R8_UINT: | ||
| 289 | case PixelFormat::R16G16B16A16_SINT: | ||
| 290 | case PixelFormat::R16G16B16A16_UINT: | ||
| 291 | case PixelFormat::R32G32B32A32_UINT: | ||
| 292 | case PixelFormat::R32G32B32A32_SINT: | ||
| 293 | case PixelFormat::R32G32_SINT: | ||
| 294 | case PixelFormat::R16_UINT: | ||
| 295 | case PixelFormat::R16_SINT: | ||
| 296 | case PixelFormat::R16G16_UINT: | ||
| 297 | case PixelFormat::R16G16_SINT: | ||
| 298 | case PixelFormat::R8G8_SINT: | ||
| 299 | case PixelFormat::R8G8_UINT: | ||
| 300 | case PixelFormat::R32G32_UINT: | ||
| 301 | case PixelFormat::R32_UINT: | ||
| 302 | case PixelFormat::R32_SINT: | ||
| 303 | return true; | ||
| 304 | default: | ||
| 305 | return false; | ||
| 306 | } | ||
| 307 | } | ||
| 308 | |||
| 309 | bool IsPixelFormatSignedInteger(PixelFormat format) { | ||
| 310 | switch (format) { | ||
| 311 | case PixelFormat::A8B8G8R8_SINT: | ||
| 312 | case PixelFormat::R8_SINT: | ||
| 313 | case PixelFormat::R16G16B16A16_SINT: | ||
| 314 | case PixelFormat::R32G32B32A32_SINT: | ||
| 315 | case PixelFormat::R32G32_SINT: | ||
| 316 | case PixelFormat::R16_SINT: | ||
| 317 | case PixelFormat::R16G16_SINT: | ||
| 318 | case PixelFormat::R8G8_SINT: | ||
| 319 | case PixelFormat::R32_SINT: | ||
| 320 | return true; | ||
| 321 | default: | ||
| 322 | return false; | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | size_t PixelComponentSizeBitsInteger(PixelFormat format) { | ||
| 327 | switch (format) { | ||
| 328 | case PixelFormat::A8B8G8R8_SINT: | ||
| 329 | case PixelFormat::A8B8G8R8_UINT: | ||
| 330 | case PixelFormat::R8_SINT: | ||
| 331 | case PixelFormat::R8_UINT: | ||
| 332 | case PixelFormat::R8G8_SINT: | ||
| 333 | case PixelFormat::R8G8_UINT: | ||
| 334 | return 8; | ||
| 335 | case PixelFormat::A2B10G10R10_UINT: | ||
| 336 | return 10; | ||
| 337 | case PixelFormat::R16G16B16A16_SINT: | ||
| 338 | case PixelFormat::R16G16B16A16_UINT: | ||
| 339 | case PixelFormat::R16_UINT: | ||
| 340 | case PixelFormat::R16_SINT: | ||
| 341 | case PixelFormat::R16G16_UINT: | ||
| 342 | case PixelFormat::R16G16_SINT: | ||
| 343 | return 16; | ||
| 344 | case PixelFormat::R32G32B32A32_UINT: | ||
| 345 | case PixelFormat::R32G32B32A32_SINT: | ||
| 346 | case PixelFormat::R32G32_SINT: | ||
| 347 | case PixelFormat::R32G32_UINT: | ||
| 348 | case PixelFormat::R32_UINT: | ||
| 349 | case PixelFormat::R32_SINT: | ||
| 350 | return 32; | ||
| 351 | default: | ||
| 352 | return 0; | ||
| 353 | } | ||
| 354 | } | ||
| 355 | |||
| 282 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | 356 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { |
| 283 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; | 357 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; |
| 284 | } | 358 | } |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1503db81f..2ce7c7d33 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -460,6 +460,12 @@ bool IsPixelFormatASTC(PixelFormat format); | |||
| 460 | 460 | ||
| 461 | bool IsPixelFormatSRGB(PixelFormat format); | 461 | bool IsPixelFormatSRGB(PixelFormat format); |
| 462 | 462 | ||
| 463 | bool IsPixelFormatInteger(PixelFormat format); | ||
| 464 | |||
| 465 | bool IsPixelFormatSignedInteger(PixelFormat format); | ||
| 466 | |||
| 467 | size_t PixelComponentSizeBitsInteger(PixelFormat format); | ||
| 468 | |||
| 463 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); | 469 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); |
| 464 | 470 | ||
| 465 | u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); | 471 | u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); |
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 6052d148a..3db2fdf34 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -60,15 +60,17 @@ namespace { | |||
| 60 | ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) | 60 | ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) |
| 61 | : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, | 61 | : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, |
| 62 | unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, | 62 | unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, |
| 63 | converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, | 63 | converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, |
| 64 | cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, | 64 | has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, |
| 65 | mip_level_offsets{CalculateMipLevelOffsets(info)} { | 65 | cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { |
| 66 | if (info.type == ImageType::e3D) { | 66 | if (info.type == ImageType::e3D) { |
| 67 | slice_offsets = CalculateSliceOffsets(info); | 67 | slice_offsets = CalculateSliceOffsets(info); |
| 68 | slice_subresources = CalculateSliceSubresources(info); | 68 | slice_subresources = CalculateSliceSubresources(info); |
| 69 | } | 69 | } |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | ImageBase::ImageBase(const NullImageParams&) {} | ||
| 73 | |||
| 72 | ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) | 74 | ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) |
| 73 | : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} | 75 | : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} |
| 74 | 76 | ||
| @@ -254,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i | |||
| 254 | } | 256 | } |
| 255 | lhs.aliased_images.push_back(std::move(lhs_alias)); | 257 | lhs.aliased_images.push_back(std::move(lhs_alias)); |
| 256 | rhs.aliased_images.push_back(std::move(rhs_alias)); | 258 | rhs.aliased_images.push_back(std::move(rhs_alias)); |
| 259 | lhs.flags &= ~ImageFlagBits::IsRescalable; | ||
| 260 | rhs.flags &= ~ImageFlagBits::IsRescalable; | ||
| 257 | } | 261 | } |
| 258 | 262 | ||
| 259 | } // namespace VideoCommon | 263 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0c17a791b..89c111c00 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -33,6 +33,11 @@ enum class ImageFlagBits : u32 { | |||
| 33 | ///< garbage collection priority | 33 | ///< garbage collection priority |
| 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage | 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage |
| 35 | ///< collection | 35 | ///< collection |
| 36 | |||
| 37 | // Rescaler | ||
| 38 | Rescaled = 1 << 12, | ||
| 39 | CheckingRescalable = 1 << 13, | ||
| 40 | IsRescalable = 1 << 14, | ||
| 36 | }; | 41 | }; |
| 37 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 42 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 38 | 43 | ||
| @@ -43,8 +48,11 @@ struct AliasedImage { | |||
| 43 | ImageId id; | 48 | ImageId id; |
| 44 | }; | 49 | }; |
| 45 | 50 | ||
| 51 | struct NullImageParams {}; | ||
| 52 | |||
| 46 | struct ImageBase { | 53 | struct ImageBase { |
| 47 | explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | 54 | explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); |
| 55 | explicit ImageBase(const NullImageParams&); | ||
| 48 | 56 | ||
| 49 | [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; | 57 | [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; |
| 50 | 58 | ||
| @@ -68,11 +76,18 @@ struct ImageBase { | |||
| 68 | void CheckBadOverlapState(); | 76 | void CheckBadOverlapState(); |
| 69 | void CheckAliasState(); | 77 | void CheckAliasState(); |
| 70 | 78 | ||
| 79 | bool HasScaled() const { | ||
| 80 | return has_scaled; | ||
| 81 | } | ||
| 82 | |||
| 71 | ImageInfo info; | 83 | ImageInfo info; |
| 72 | 84 | ||
| 73 | u32 guest_size_bytes = 0; | 85 | u32 guest_size_bytes = 0; |
| 74 | u32 unswizzled_size_bytes = 0; | 86 | u32 unswizzled_size_bytes = 0; |
| 75 | u32 converted_size_bytes = 0; | 87 | u32 converted_size_bytes = 0; |
| 88 | u32 scale_rating = 0; | ||
| 89 | u64 scale_tick = 0; | ||
| 90 | bool has_scaled = false; | ||
| 76 | ImageFlagBits flags = ImageFlagBits::CpuModified; | 91 | ImageFlagBits flags = ImageFlagBits::CpuModified; |
| 77 | 92 | ||
| 78 | GPUVAddr gpu_addr = 0; | 93 | GPUVAddr gpu_addr = 0; |
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 64fd7010a..afb94082b 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp | |||
| @@ -16,6 +16,7 @@ namespace VideoCommon { | |||
| 16 | using Tegra::Texture::TextureType; | 16 | using Tegra::Texture::TextureType; |
| 17 | using Tegra::Texture::TICEntry; | 17 | using Tegra::Texture::TICEntry; |
| 18 | using VideoCore::Surface::PixelFormat; | 18 | using VideoCore::Surface::PixelFormat; |
| 19 | using VideoCore::Surface::SurfaceType; | ||
| 19 | 20 | ||
| 20 | ImageInfo::ImageInfo(const TICEntry& config) noexcept { | 21 | ImageInfo::ImageInfo(const TICEntry& config) noexcept { |
| 21 | format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | 22 | format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, |
| @@ -31,6 +32,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { | |||
| 31 | .depth = config.block_depth, | 32 | .depth = config.block_depth, |
| 32 | }; | 33 | }; |
| 33 | } | 34 | } |
| 35 | rescaleable = false; | ||
| 34 | tile_width_spacing = config.tile_width_spacing; | 36 | tile_width_spacing = config.tile_width_spacing; |
| 35 | if (config.texture_type != TextureType::Texture2D && | 37 | if (config.texture_type != TextureType::Texture2D && |
| 36 | config.texture_type != TextureType::Texture2DNoMipmap) { | 38 | config.texture_type != TextureType::Texture2DNoMipmap) { |
| @@ -41,6 +43,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { | |||
| 41 | ASSERT(config.BaseLayer() == 0); | 43 | ASSERT(config.BaseLayer() == 0); |
| 42 | type = ImageType::e1D; | 44 | type = ImageType::e1D; |
| 43 | size.width = config.Width(); | 45 | size.width = config.Width(); |
| 46 | resources.layers = 1; | ||
| 44 | break; | 47 | break; |
| 45 | case TextureType::Texture1DArray: | 48 | case TextureType::Texture1DArray: |
| 46 | UNIMPLEMENTED_IF(config.BaseLayer() != 0); | 49 | UNIMPLEMENTED_IF(config.BaseLayer() != 0); |
| @@ -52,12 +55,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { | |||
| 52 | case TextureType::Texture2DNoMipmap: | 55 | case TextureType::Texture2DNoMipmap: |
| 53 | ASSERT(config.Depth() == 1); | 56 | ASSERT(config.Depth() == 1); |
| 54 | type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; | 57 | type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; |
| 58 | rescaleable = !config.IsPitchLinear(); | ||
| 55 | size.width = config.Width(); | 59 | size.width = config.Width(); |
| 56 | size.height = config.Height(); | 60 | size.height = config.Height(); |
| 57 | resources.layers = config.BaseLayer() + 1; | 61 | resources.layers = config.BaseLayer() + 1; |
| 58 | break; | 62 | break; |
| 59 | case TextureType::Texture2DArray: | 63 | case TextureType::Texture2DArray: |
| 60 | type = ImageType::e2D; | 64 | type = ImageType::e2D; |
| 65 | rescaleable = true; | ||
| 61 | size.width = config.Width(); | 66 | size.width = config.Width(); |
| 62 | size.height = config.Height(); | 67 | size.height = config.Height(); |
| 63 | resources.layers = config.BaseLayer() + config.Depth(); | 68 | resources.layers = config.BaseLayer() + config.Depth(); |
| @@ -82,10 +87,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { | |||
| 82 | size.width = config.Width(); | 87 | size.width = config.Width(); |
| 83 | size.height = config.Height(); | 88 | size.height = config.Height(); |
| 84 | size.depth = config.Depth(); | 89 | size.depth = config.Depth(); |
| 90 | resources.layers = 1; | ||
| 85 | break; | 91 | break; |
| 86 | case TextureType::Texture1DBuffer: | 92 | case TextureType::Texture1DBuffer: |
| 87 | type = ImageType::Buffer; | 93 | type = ImageType::Buffer; |
| 88 | size.width = config.Width(); | 94 | size.width = config.Width(); |
| 95 | resources.layers = 1; | ||
| 89 | break; | 96 | break; |
| 90 | default: | 97 | default: |
| 91 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | 98 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); |
| @@ -95,12 +102,16 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { | |||
| 95 | // FIXME: Call this without passing *this | 102 | // FIXME: Call this without passing *this |
| 96 | layer_stride = CalculateLayerStride(*this); | 103 | layer_stride = CalculateLayerStride(*this); |
| 97 | maybe_unaligned_layer_stride = CalculateLayerSize(*this); | 104 | maybe_unaligned_layer_stride = CalculateLayerSize(*this); |
| 105 | rescaleable &= (block.depth == 0) && resources.levels == 1; | ||
| 106 | rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture; | ||
| 107 | downscaleable = size.height > 512; | ||
| 98 | } | 108 | } |
| 99 | } | 109 | } |
| 100 | 110 | ||
| 101 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { | 111 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { |
| 102 | const auto& rt = regs.rt[index]; | 112 | const auto& rt = regs.rt[index]; |
| 103 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); | 113 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); |
| 114 | rescaleable = false; | ||
| 104 | if (rt.tile_mode.is_pitch_linear) { | 115 | if (rt.tile_mode.is_pitch_linear) { |
| 105 | ASSERT(rt.tile_mode.is_3d == 0); | 116 | ASSERT(rt.tile_mode.is_3d == 0); |
| 106 | type = ImageType::Linear; | 117 | type = ImageType::Linear; |
| @@ -126,6 +137,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) | |||
| 126 | type = ImageType::e3D; | 137 | type = ImageType::e3D; |
| 127 | size.depth = rt.depth; | 138 | size.depth = rt.depth; |
| 128 | } else { | 139 | } else { |
| 140 | rescaleable = block.depth == 0; | ||
| 141 | rescaleable &= size.height > 256; | ||
| 142 | downscaleable = size.height > 512; | ||
| 129 | type = ImageType::e2D; | 143 | type = ImageType::e2D; |
| 130 | resources.layers = rt.depth; | 144 | resources.layers = rt.depth; |
| 131 | } | 145 | } |
| @@ -135,6 +149,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | |||
| 135 | format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); | 149 | format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); |
| 136 | size.width = regs.zeta_width; | 150 | size.width = regs.zeta_width; |
| 137 | size.height = regs.zeta_height; | 151 | size.height = regs.zeta_height; |
| 152 | rescaleable = false; | ||
| 138 | resources.levels = 1; | 153 | resources.levels = 1; |
| 139 | layer_stride = regs.zeta.layer_stride * 4; | 154 | layer_stride = regs.zeta.layer_stride * 4; |
| 140 | maybe_unaligned_layer_stride = layer_stride; | 155 | maybe_unaligned_layer_stride = layer_stride; |
| @@ -153,6 +168,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | |||
| 153 | type = ImageType::e3D; | 168 | type = ImageType::e3D; |
| 154 | size.depth = regs.zeta_depth; | 169 | size.depth = regs.zeta_depth; |
| 155 | } else { | 170 | } else { |
| 171 | rescaleable = block.depth == 0; | ||
| 172 | downscaleable = size.height > 512; | ||
| 156 | type = ImageType::e2D; | 173 | type = ImageType::e2D; |
| 157 | resources.layers = regs.zeta_depth; | 174 | resources.layers = regs.zeta_depth; |
| 158 | } | 175 | } |
| @@ -161,6 +178,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | |||
| 161 | ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | 178 | ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { |
| 162 | UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); | 179 | UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); |
| 163 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); | 180 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); |
| 181 | rescaleable = false; | ||
| 164 | if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { | 182 | if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { |
| 165 | type = ImageType::Linear; | 183 | type = ImageType::Linear; |
| 166 | size = Extent3D{ | 184 | size = Extent3D{ |
| @@ -171,6 +189,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | |||
| 171 | pitch = config.pitch; | 189 | pitch = config.pitch; |
| 172 | } else { | 190 | } else { |
| 173 | type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; | 191 | type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; |
| 192 | |||
| 174 | block = Extent3D{ | 193 | block = Extent3D{ |
| 175 | .width = config.block_width, | 194 | .width = config.block_width, |
| 176 | .height = config.block_height, | 195 | .height = config.block_height, |
| @@ -183,6 +202,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | |||
| 183 | .height = config.height, | 202 | .height = config.height, |
| 184 | .depth = 1, | 203 | .depth = 1, |
| 185 | }; | 204 | }; |
| 205 | rescaleable = block.depth == 0; | ||
| 206 | rescaleable &= size.height > 256; | ||
| 207 | downscaleable = size.height > 512; | ||
| 186 | } | 208 | } |
| 187 | } | 209 | } |
| 188 | 210 | ||
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 5049fc36e..5932dcaba 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h | |||
| @@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry; | |||
| 15 | using VideoCore::Surface::PixelFormat; | 15 | using VideoCore::Surface::PixelFormat; |
| 16 | 16 | ||
| 17 | struct ImageInfo { | 17 | struct ImageInfo { |
| 18 | explicit ImageInfo() = default; | 18 | ImageInfo() = default; |
| 19 | explicit ImageInfo(const TICEntry& config) noexcept; | 19 | explicit ImageInfo(const TICEntry& config) noexcept; |
| 20 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; | 20 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; |
| 21 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; | 21 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; |
| @@ -33,6 +33,8 @@ struct ImageInfo { | |||
| 33 | u32 maybe_unaligned_layer_stride = 0; | 33 | u32 maybe_unaligned_layer_stride = 0; |
| 34 | u32 num_samples = 1; | 34 | u32 num_samples = 1; |
| 35 | u32 tile_width_spacing = 0; | 35 | u32 tile_width_spacing = 0; |
| 36 | bool rescaleable = false; | ||
| 37 | bool downscaleable = false; | ||
| 36 | }; | 38 | }; |
| 37 | 39 | ||
| 38 | } // namespace VideoCommon | 40 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 450becbeb..c7b4fc231 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp | |||
| @@ -37,14 +37,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i | |||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) | 39 | ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) |
| 40 | : format{info.format}, type{ImageViewType::Buffer}, size{ | 40 | : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, |
| 41 | .width = info.size.width, | 41 | size{ |
| 42 | .height = 1, | 42 | .width = info.size.width, |
| 43 | .depth = 1, | 43 | .height = 1, |
| 44 | } { | 44 | .depth = 1, |
| 45 | } { | ||
| 45 | ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); | 46 | ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); |
| 46 | } | 47 | } |
| 47 | 48 | ||
| 48 | ImageViewBase::ImageViewBase(const NullImageParams&) {} | 49 | ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} |
| 49 | 50 | ||
| 50 | } // namespace VideoCommon | 51 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 903f715c5..9c24c5359 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h | |||
| @@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat; | |||
| 15 | struct ImageViewInfo; | 15 | struct ImageViewInfo; |
| 16 | struct ImageInfo; | 16 | struct ImageInfo; |
| 17 | 17 | ||
| 18 | struct NullImageParams {}; | 18 | struct NullImageViewParams {}; |
| 19 | 19 | ||
| 20 | enum class ImageViewFlagBits : u16 { | 20 | enum class ImageViewFlagBits : u16 { |
| 21 | PreemtiveDownload = 1 << 0, | 21 | PreemtiveDownload = 1 << 0, |
| @@ -28,7 +28,7 @@ struct ImageViewBase { | |||
| 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, |
| 29 | ImageId image_id); | 29 | ImageId image_id); |
| 30 | explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); | 30 | explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); |
| 31 | explicit ImageViewBase(const NullImageParams&); | 31 | explicit ImageViewBase(const NullImageViewParams&); |
| 32 | 32 | ||
| 33 | [[nodiscard]] bool IsBuffer() const noexcept { | 33 | [[nodiscard]] bool IsBuffer() const noexcept { |
| 34 | return type == ImageViewType::Buffer; | 34 | return type == ImageViewType::Buffer; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f70c1f764..4d2874bf2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <unordered_set> | 7 | #include <unordered_set> |
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 10 | #include "common/settings.h" | ||
| 10 | #include "video_core/dirty_flags.h" | 11 | #include "video_core/dirty_flags.h" |
| 11 | #include "video_core/engines/kepler_compute.h" | 12 | #include "video_core/engines/kepler_compute.h" |
| 12 | #include "video_core/texture_cache/image_view_base.h" | 13 | #include "video_core/texture_cache/image_view_base.h" |
| @@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 44 | 45 | ||
| 45 | // Make sure the first index is reserved for the null resources | 46 | // Make sure the first index is reserved for the null resources |
| 46 | // This way the null resource becomes a compile time constant | 47 | // This way the null resource becomes a compile time constant |
| 47 | void(slot_image_views.insert(runtime, NullImageParams{})); | 48 | void(slot_images.insert(NullImageParams{})); |
| 49 | void(slot_image_views.insert(runtime, NullImageViewParams{})); | ||
| 48 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 49 | 51 | ||
| 50 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 51 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); |
| 52 | const u64 possible_expected_memory = (device_memory * 3) / 10; | 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; |
| 53 | const u64 possible_critical_memory = (device_memory * 6) / 10; | 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; |
| 54 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); |
| 55 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); |
| 56 | minimum_memory = 0; | 58 | minimum_memory = 0; |
| 57 | } else { | 59 | } else { |
| 58 | // on OGL we can be more conservatives as the driver takes care. | 60 | // On OpenGL we can be more conservatives as the driver takes care. |
| 59 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 61 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 60 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 62 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| 61 | minimum_memory = expected_memory; | 63 | minimum_memory = 0; |
| 62 | } | 64 | } |
| 63 | } | 65 | } |
| 64 | 66 | ||
| @@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 67 | const bool high_priority_mode = total_used_memory >= expected_memory; | 69 | const bool high_priority_mode = total_used_memory >= expected_memory; |
| 68 | const bool aggressive_mode = total_used_memory >= critical_memory; | 70 | const bool aggressive_mode = total_used_memory >= critical_memory; |
| 69 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; | 71 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; |
| 70 | size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); | 72 | size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); |
| 71 | const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { | 73 | const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { |
| 72 | if (num_iterations == 0) { | 74 | if (num_iterations == 0) { |
| 73 | return true; | 75 | return true; |
| @@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 89 | UntrackImage(image, image_id); | 91 | UntrackImage(image, image_id); |
| 90 | } | 92 | } |
| 91 | UnregisterImage(image_id); | 93 | UnregisterImage(image_id); |
| 92 | DeleteImage(image_id); | 94 | DeleteImage(image_id, image.scale_tick > frame_tick + 5); |
| 93 | return false; | 95 | return false; |
| 94 | }; | 96 | }; |
| 95 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | 97 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); |
| @@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() { | |||
| 103 | sentenced_images.Tick(); | 105 | sentenced_images.Tick(); |
| 104 | sentenced_framebuffers.Tick(); | 106 | sentenced_framebuffers.Tick(); |
| 105 | sentenced_image_view.Tick(); | 107 | sentenced_image_view.Tick(); |
| 108 | runtime.TickFrame(); | ||
| 106 | ++frame_tick; | 109 | ++frame_tick; |
| 107 | } | 110 | } |
| 108 | 111 | ||
| @@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept { | |||
| 122 | } | 125 | } |
| 123 | 126 | ||
| 124 | template <class P> | 127 | template <class P> |
| 125 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | 128 | template <bool has_blacklists> |
| 126 | std::span<ImageViewId> image_view_ids) { | 129 | void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { |
| 127 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | 130 | FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); |
| 128 | } | 131 | } |
| 129 | 132 | ||
| 130 | template <class P> | 133 | template <class P> |
| 131 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | 134 | void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { |
| 132 | std::span<ImageViewId> image_view_ids) { | 135 | FillImageViews<true>(compute_image_table, compute_image_view_ids, views); |
| 133 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 134 | } | 136 | } |
| 135 | 137 | ||
| 136 | template <class P> | 138 | template <class P> |
| @@ -190,6 +192,102 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { | |||
| 190 | } | 192 | } |
| 191 | 193 | ||
| 192 | template <class P> | 194 | template <class P> |
| 195 | bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | ||
| 196 | auto& flags = maxwell3d.dirty.flags; | ||
| 197 | u32 scale_rating = 0; | ||
| 198 | bool rescaled = false; | ||
| 199 | std::array<ImageId, NUM_RT> tmp_color_images{}; | ||
| 200 | ImageId tmp_depth_image{}; | ||
| 201 | do { | ||
| 202 | flags[Dirty::RenderTargets] = false; | ||
| 203 | |||
| 204 | has_deleted_images = false; | ||
| 205 | // Render target control is used on all render targets, so force look ups when this one is | ||
| 206 | // up | ||
| 207 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 208 | flags[Dirty::RenderTargetControl] = false; | ||
| 209 | |||
| 210 | scale_rating = 0; | ||
| 211 | bool any_rescaled = false; | ||
| 212 | bool can_rescale = true; | ||
| 213 | const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { | ||
| 214 | if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) { | ||
| 215 | const auto& view = slot_image_views[view_id]; | ||
| 216 | const auto image_id = view.image_id; | ||
| 217 | id_save = image_id; | ||
| 218 | auto& image = slot_images[image_id]; | ||
| 219 | can_rescale &= ImageCanRescale(image); | ||
| 220 | any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) || | ||
| 221 | GetFormatType(image.info.format) != SurfaceType::ColorTexture; | ||
| 222 | scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick | ||
| 223 | ? image.scale_rating + 1U | ||
| 224 | : image.scale_rating); | ||
| 225 | } else { | ||
| 226 | id_save = CORRUPT_ID; | ||
| 227 | } | ||
| 228 | }; | ||
| 229 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 230 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 231 | if (flags[Dirty::ColorBuffer0 + index] || force) { | ||
| 232 | flags[Dirty::ColorBuffer0 + index] = false; | ||
| 233 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | ||
| 234 | } | ||
| 235 | check_rescale(color_buffer_id, tmp_color_images[index]); | ||
| 236 | } | ||
| 237 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 238 | flags[Dirty::ZetaBuffer] = false; | ||
| 239 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 240 | } | ||
| 241 | check_rescale(render_targets.depth_buffer_id, tmp_depth_image); | ||
| 242 | |||
| 243 | if (can_rescale) { | ||
| 244 | rescaled = any_rescaled || scale_rating >= 2; | ||
| 245 | const auto scale_up = [this](ImageId image_id) { | ||
| 246 | if (image_id != CORRUPT_ID) { | ||
| 247 | Image& image = slot_images[image_id]; | ||
| 248 | ScaleUp(image); | ||
| 249 | } | ||
| 250 | }; | ||
| 251 | if (rescaled) { | ||
| 252 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 253 | scale_up(tmp_color_images[index]); | ||
| 254 | } | ||
| 255 | scale_up(tmp_depth_image); | ||
| 256 | scale_rating = 2; | ||
| 257 | } | ||
| 258 | } else { | ||
| 259 | rescaled = false; | ||
| 260 | const auto scale_down = [this](ImageId image_id) { | ||
| 261 | if (image_id != CORRUPT_ID) { | ||
| 262 | Image& image = slot_images[image_id]; | ||
| 263 | ScaleDown(image); | ||
| 264 | } | ||
| 265 | }; | ||
| 266 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 267 | scale_down(tmp_color_images[index]); | ||
| 268 | } | ||
| 269 | scale_down(tmp_depth_image); | ||
| 270 | scale_rating = 1; | ||
| 271 | } | ||
| 272 | } while (has_deleted_images); | ||
| 273 | const auto set_rating = [this, scale_rating](ImageId image_id) { | ||
| 274 | if (image_id != CORRUPT_ID) { | ||
| 275 | Image& image = slot_images[image_id]; | ||
| 276 | image.scale_rating = scale_rating; | ||
| 277 | if (image.scale_tick <= frame_tick) { | ||
| 278 | image.scale_tick = frame_tick + 1; | ||
| 279 | } | ||
| 280 | } | ||
| 281 | }; | ||
| 282 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 283 | set_rating(tmp_color_images[index]); | ||
| 284 | } | ||
| 285 | set_rating(tmp_depth_image); | ||
| 286 | |||
| 287 | return rescaled; | ||
| 288 | } | ||
| 289 | |||
| 290 | template <class P> | ||
| 193 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | 291 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
| 194 | using namespace VideoCommon::Dirty; | 292 | using namespace VideoCommon::Dirty; |
| 195 | auto& flags = maxwell3d.dirty.flags; | 293 | auto& flags = maxwell3d.dirty.flags; |
| @@ -202,24 +300,18 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | |||
| 202 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | 300 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); |
| 203 | return; | 301 | return; |
| 204 | } | 302 | } |
| 205 | flags[Dirty::RenderTargets] = false; | ||
| 206 | 303 | ||
| 207 | // Render target control is used on all render targets, so force look ups when this one is up | 304 | const bool rescaled = RescaleRenderTargets(is_clear); |
| 208 | const bool force = flags[Dirty::RenderTargetControl]; | 305 | if (is_rescaling != rescaled) { |
| 209 | flags[Dirty::RenderTargetControl] = false; | 306 | flags[Dirty::RescaleViewports] = true; |
| 307 | flags[Dirty::RescaleScissors] = true; | ||
| 308 | is_rescaling = rescaled; | ||
| 309 | } | ||
| 210 | 310 | ||
| 211 | for (size_t index = 0; index < NUM_RT; ++index) { | 311 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 212 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | 312 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 213 | if (flags[Dirty::ColorBuffer0 + index] || force) { | ||
| 214 | flags[Dirty::ColorBuffer0 + index] = false; | ||
| 215 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | ||
| 216 | } | ||
| 217 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | 313 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); |
| 218 | } | 314 | } |
| 219 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 220 | flags[Dirty::ZetaBuffer] = false; | ||
| 221 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 222 | } | ||
| 223 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | 315 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; |
| 224 | 316 | ||
| 225 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | 317 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); |
| @@ -227,9 +319,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | |||
| 227 | for (size_t index = 0; index < NUM_RT; ++index) { | 319 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 228 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); | 320 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); |
| 229 | } | 321 | } |
| 322 | u32 up_scale = 1; | ||
| 323 | u32 down_shift = 0; | ||
| 324 | if (is_rescaling) { | ||
| 325 | up_scale = Settings::values.resolution_info.up_scale; | ||
| 326 | down_shift = Settings::values.resolution_info.down_shift; | ||
| 327 | } | ||
| 230 | render_targets.size = Extent2D{ | 328 | render_targets.size = Extent2D{ |
| 231 | maxwell3d.regs.render_area.width, | 329 | (maxwell3d.regs.render_area.width * up_scale) >> down_shift, |
| 232 | maxwell3d.regs.render_area.height, | 330 | (maxwell3d.regs.render_area.height * up_scale) >> down_shift, |
| 233 | }; | 331 | }; |
| 234 | 332 | ||
| 235 | flags[Dirty::DepthBiasGlobal] = true; | 333 | flags[Dirty::DepthBiasGlobal] = true; |
| @@ -241,17 +339,28 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { | |||
| 241 | } | 339 | } |
| 242 | 340 | ||
| 243 | template <class P> | 341 | template <class P> |
| 342 | template <bool has_blacklists> | ||
| 244 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, | 343 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, |
| 245 | std::span<ImageViewId> cached_image_view_ids, | 344 | std::span<ImageViewId> cached_image_view_ids, |
| 246 | std::span<const u32> indices, | 345 | std::span<ImageViewInOut> views) { |
| 247 | std::span<ImageViewId> image_view_ids) { | 346 | bool has_blacklisted; |
| 248 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 249 | do { | 347 | do { |
| 250 | has_deleted_images = false; | 348 | has_deleted_images = false; |
| 251 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | 349 | if constexpr (has_blacklists) { |
| 252 | return VisitImageView(table, cached_image_view_ids, index); | 350 | has_blacklisted = false; |
| 253 | }); | 351 | } |
| 254 | } while (has_deleted_images); | 352 | for (ImageViewInOut& view : views) { |
| 353 | view.id = VisitImageView(table, cached_image_view_ids, view.index); | ||
| 354 | if constexpr (has_blacklists) { | ||
| 355 | if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { | ||
| 356 | const ImageViewBase& image_view{slot_image_views[view.id]}; | ||
| 357 | auto& image = slot_images[image_view.image_id]; | ||
| 358 | has_blacklisted |= ScaleDown(image); | ||
| 359 | image.scale_rating = 0; | ||
| 360 | } | ||
| 361 | } | ||
| 362 | } | ||
| 363 | } while (has_deleted_images || (has_blacklists && has_blacklisted)); | ||
| 255 | } | 364 | } |
| 256 | 365 | ||
| 257 | template <class P> | 366 | template <class P> |
| @@ -369,8 +478,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 369 | PrepareImage(src_id, false, false); | 478 | PrepareImage(src_id, false, false); |
| 370 | PrepareImage(dst_id, true, false); | 479 | PrepareImage(dst_id, true, false); |
| 371 | 480 | ||
| 372 | ImageBase& dst_image = slot_images[dst_id]; | 481 | Image& dst_image = slot_images[dst_id]; |
| 373 | const ImageBase& src_image = slot_images[src_id]; | 482 | Image& src_image = slot_images[src_id]; |
| 483 | bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); | ||
| 484 | bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); | ||
| 485 | |||
| 486 | const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1; | ||
| 487 | if (is_src_rescaled != is_dst_rescaled) { | ||
| 488 | if (ImageCanRescale(src_image)) { | ||
| 489 | ScaleUp(src_image); | ||
| 490 | is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); | ||
| 491 | if (is_resolve) { | ||
| 492 | dst_image.info.rescaleable = true; | ||
| 493 | for (const auto& alias : dst_image.aliased_images) { | ||
| 494 | Image& other_image = slot_images[alias.id]; | ||
| 495 | other_image.info.rescaleable = true; | ||
| 496 | } | ||
| 497 | } | ||
| 498 | } | ||
| 499 | if (ImageCanRescale(dst_image)) { | ||
| 500 | ScaleUp(dst_image); | ||
| 501 | is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); | ||
| 502 | } | ||
| 503 | } | ||
| 504 | if (is_resolve && (is_src_rescaled != is_dst_rescaled)) { | ||
| 505 | // A resolve requires both images to be the same dimensions. Resize down if needed. | ||
| 506 | ScaleDown(src_image); | ||
| 507 | ScaleDown(dst_image); | ||
| 508 | is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); | ||
| 509 | is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); | ||
| 510 | } | ||
| 511 | const auto& resolution = Settings::values.resolution_info; | ||
| 512 | const auto scale_region = [&](Region2D& region) { | ||
| 513 | region.start.x = resolution.ScaleUp(region.start.x); | ||
| 514 | region.start.y = resolution.ScaleUp(region.start.y); | ||
| 515 | region.end.x = resolution.ScaleUp(region.end.x); | ||
| 516 | region.end.y = resolution.ScaleUp(region.end.y); | ||
| 517 | }; | ||
| 374 | 518 | ||
| 375 | // TODO: Deduplicate | 519 | // TODO: Deduplicate |
| 376 | const std::optional src_base = src_image.TryFindBase(src.Address()); | 520 | const std::optional src_base = src_image.TryFindBase(src.Address()); |
| @@ -378,20 +522,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 378 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); | 522 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); |
| 379 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); | 523 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); |
| 380 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | 524 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); |
| 381 | const Region2D src_region{ | 525 | Region2D src_region{ |
| 382 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | 526 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, |
| 383 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | 527 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, |
| 384 | }; | 528 | }; |
| 529 | if (is_src_rescaled) { | ||
| 530 | scale_region(src_region); | ||
| 531 | } | ||
| 385 | 532 | ||
| 386 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | 533 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); |
| 387 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | 534 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; |
| 388 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | 535 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); |
| 389 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | 536 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); |
| 390 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); | 537 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); |
| 391 | const Region2D dst_region{ | 538 | Region2D dst_region{ |
| 392 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, | 539 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, |
| 393 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, | 540 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, |
| 394 | }; | 541 | }; |
| 542 | if (is_dst_rescaled) { | ||
| 543 | scale_region(dst_region); | ||
| 544 | } | ||
| 395 | 545 | ||
| 396 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. | 546 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. |
| 397 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | 547 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; |
| @@ -487,6 +637,20 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 487 | } | 637 | } |
| 488 | 638 | ||
| 489 | template <class P> | 639 | template <class P> |
| 640 | bool TextureCache<P>::IsRescaling() const noexcept { | ||
| 641 | return is_rescaling; | ||
| 642 | } | ||
| 643 | |||
| 644 | template <class P> | ||
| 645 | bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept { | ||
| 646 | if (image_view.type == ImageViewType::Buffer) { | ||
| 647 | return false; | ||
| 648 | } | ||
| 649 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 650 | return True(image.flags & ImageFlagBits::Rescaled); | ||
| 651 | } | ||
| 652 | |||
| 653 | template <class P> | ||
| 490 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 654 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 491 | bool is_modified = false; | 655 | bool is_modified = false; |
| 492 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | 656 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |
| @@ -624,6 +788,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 624 | } | 788 | } |
| 625 | 789 | ||
| 626 | template <class P> | 790 | template <class P> |
| 791 | bool TextureCache<P>::ImageCanRescale(ImageBase& image) { | ||
| 792 | if (!image.info.rescaleable) { | ||
| 793 | return false; | ||
| 794 | } | ||
| 795 | if (Settings::values.resolution_info.downscale && !image.info.downscaleable) { | ||
| 796 | return false; | ||
| 797 | } | ||
| 798 | if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) { | ||
| 799 | return true; | ||
| 800 | } | ||
| 801 | if (True(image.flags & ImageFlagBits::IsRescalable)) { | ||
| 802 | return true; | ||
| 803 | } | ||
| 804 | image.flags |= ImageFlagBits::CheckingRescalable; | ||
| 805 | for (const auto& alias : image.aliased_images) { | ||
| 806 | Image& other_image = slot_images[alias.id]; | ||
| 807 | if (!ImageCanRescale(other_image)) { | ||
| 808 | image.flags &= ~ImageFlagBits::CheckingRescalable; | ||
| 809 | return false; | ||
| 810 | } | ||
| 811 | } | ||
| 812 | image.flags &= ~ImageFlagBits::CheckingRescalable; | ||
| 813 | image.flags |= ImageFlagBits::IsRescalable; | ||
| 814 | return true; | ||
| 815 | } | ||
| 816 | |||
| 817 | template <class P> | ||
| 818 | void TextureCache<P>::InvalidateScale(Image& image) { | ||
| 819 | if (image.scale_tick <= frame_tick) { | ||
| 820 | image.scale_tick = frame_tick + 1; | ||
| 821 | } | ||
| 822 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 823 | auto& dirty = maxwell3d.dirty.flags; | ||
| 824 | dirty[Dirty::RenderTargets] = true; | ||
| 825 | dirty[Dirty::ZetaBuffer] = true; | ||
| 826 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 827 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 828 | } | ||
| 829 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 830 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 831 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 832 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 833 | } | ||
| 834 | } | ||
| 835 | RemoveImageViewReferences(image_view_ids); | ||
| 836 | RemoveFramebuffers(image_view_ids); | ||
| 837 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 838 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 839 | slot_image_views.erase(image_view_id); | ||
| 840 | } | ||
| 841 | image.image_view_ids.clear(); | ||
| 842 | image.image_view_infos.clear(); | ||
| 843 | if constexpr (ENABLE_VALIDATION) { | ||
| 844 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 845 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 846 | } | ||
| 847 | graphics_image_table.Invalidate(); | ||
| 848 | compute_image_table.Invalidate(); | ||
| 849 | has_deleted_images = true; | ||
| 850 | } | ||
| 851 | |||
| 852 | template <class P> | ||
| 853 | u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { | ||
| 854 | const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * | ||
| 855 | Settings::values.resolution_info.up_scale); | ||
| 856 | const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + | ||
| 857 | Settings::values.resolution_info.down_shift); | ||
| 858 | const u64 image_size_bytes = | ||
| 859 | static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes)); | ||
| 860 | const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift; | ||
| 861 | const u64 fitted_size = Common::AlignUp(tentative_size, 1024); | ||
| 862 | return fitted_size; | ||
| 863 | } | ||
| 864 | |||
| 865 | template <class P> | ||
| 866 | bool TextureCache<P>::ScaleUp(Image& image) { | ||
| 867 | const bool has_copy = image.HasScaled(); | ||
| 868 | const bool rescaled = image.ScaleUp(); | ||
| 869 | if (!rescaled) { | ||
| 870 | return false; | ||
| 871 | } | ||
| 872 | if (!has_copy) { | ||
| 873 | total_used_memory += GetScaledImageSizeBytes(image); | ||
| 874 | } | ||
| 875 | InvalidateScale(image); | ||
| 876 | return true; | ||
| 877 | } | ||
| 878 | |||
| 879 | template <class P> | ||
| 880 | bool TextureCache<P>::ScaleDown(Image& image) { | ||
| 881 | const bool rescaled = image.ScaleDown(); | ||
| 882 | if (!rescaled) { | ||
| 883 | return false; | ||
| 884 | } | ||
| 885 | InvalidateScale(image); | ||
| 886 | return true; | ||
| 887 | } | ||
| 888 | |||
| 889 | template <class P> | ||
| 627 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 890 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 628 | RelaxedOptions options) { | 891 | RelaxedOptions options) { |
| 629 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 892 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| @@ -660,12 +923,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 660 | std::vector<ImageId> right_aliased_ids; | 923 | std::vector<ImageId> right_aliased_ids; |
| 661 | std::unordered_set<ImageId> ignore_textures; | 924 | std::unordered_set<ImageId> ignore_textures; |
| 662 | std::vector<ImageId> bad_overlap_ids; | 925 | std::vector<ImageId> bad_overlap_ids; |
| 926 | std::vector<ImageId> all_siblings; | ||
| 927 | const bool this_is_linear = info.type == ImageType::Linear; | ||
| 663 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { | 928 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { |
| 664 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | 929 | if (True(overlap.flags & ImageFlagBits::Remapped)) { |
| 665 | ignore_textures.insert(overlap_id); | 930 | ignore_textures.insert(overlap_id); |
| 666 | return; | 931 | return; |
| 667 | } | 932 | } |
| 668 | if (info.type == ImageType::Linear) { | 933 | const bool overlap_is_linear = overlap.info.type == ImageType::Linear; |
| 934 | if (this_is_linear != overlap_is_linear) { | ||
| 935 | return; | ||
| 936 | } | ||
| 937 | if (this_is_linear && overlap_is_linear) { | ||
| 669 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | 938 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { |
| 670 | // Alias linear images with the same pitch | 939 | // Alias linear images with the same pitch |
| 671 | left_aliased_ids.push_back(overlap_id); | 940 | left_aliased_ids.push_back(overlap_id); |
| @@ -681,6 +950,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 681 | cpu_addr = solution->cpu_addr; | 950 | cpu_addr = solution->cpu_addr; |
| 682 | new_info.resources = solution->resources; | 951 | new_info.resources = solution->resources; |
| 683 | overlap_ids.push_back(overlap_id); | 952 | overlap_ids.push_back(overlap_id); |
| 953 | all_siblings.push_back(overlap_id); | ||
| 684 | return; | 954 | return; |
| 685 | } | 955 | } |
| 686 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | 956 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; |
| @@ -688,10 +958,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 688 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | 958 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { |
| 689 | left_aliased_ids.push_back(overlap_id); | 959 | left_aliased_ids.push_back(overlap_id); |
| 690 | overlap.flags |= ImageFlagBits::Alias; | 960 | overlap.flags |= ImageFlagBits::Alias; |
| 961 | all_siblings.push_back(overlap_id); | ||
| 691 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | 962 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, |
| 692 | broken_views, native_bgr)) { | 963 | broken_views, native_bgr)) { |
| 693 | right_aliased_ids.push_back(overlap_id); | 964 | right_aliased_ids.push_back(overlap_id); |
| 694 | overlap.flags |= ImageFlagBits::Alias; | 965 | overlap.flags |= ImageFlagBits::Alias; |
| 966 | all_siblings.push_back(overlap_id); | ||
| 695 | } else { | 967 | } else { |
| 696 | bad_overlap_ids.push_back(overlap_id); | 968 | bad_overlap_ids.push_back(overlap_id); |
| 697 | overlap.flags |= ImageFlagBits::BadOverlap; | 969 | overlap.flags |= ImageFlagBits::BadOverlap; |
| @@ -709,6 +981,32 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 709 | } | 981 | } |
| 710 | }; | 982 | }; |
| 711 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | 983 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); |
| 984 | |||
| 985 | bool can_rescale = info.rescaleable; | ||
| 986 | bool any_rescaled = false; | ||
| 987 | for (const ImageId sibling_id : all_siblings) { | ||
| 988 | if (!can_rescale) { | ||
| 989 | break; | ||
| 990 | } | ||
| 991 | Image& sibling = slot_images[sibling_id]; | ||
| 992 | can_rescale &= ImageCanRescale(sibling); | ||
| 993 | any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); | ||
| 994 | } | ||
| 995 | |||
| 996 | can_rescale &= any_rescaled; | ||
| 997 | |||
| 998 | if (can_rescale) { | ||
| 999 | for (const ImageId sibling_id : all_siblings) { | ||
| 1000 | Image& sibling = slot_images[sibling_id]; | ||
| 1001 | ScaleUp(sibling); | ||
| 1002 | } | ||
| 1003 | } else { | ||
| 1004 | for (const ImageId sibling_id : all_siblings) { | ||
| 1005 | Image& sibling = slot_images[sibling_id]; | ||
| 1006 | ScaleDown(sibling); | ||
| 1007 | } | ||
| 1008 | } | ||
| 1009 | |||
| 712 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1010 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| 713 | Image& new_image = slot_images[new_image_id]; | 1011 | Image& new_image = slot_images[new_image_id]; |
| 714 | 1012 | ||
| @@ -731,14 +1029,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 731 | // TODO: Only upload what we need | 1029 | // TODO: Only upload what we need |
| 732 | RefreshContents(new_image, new_image_id); | 1030 | RefreshContents(new_image, new_image_id); |
| 733 | 1031 | ||
| 1032 | if (can_rescale) { | ||
| 1033 | ScaleUp(new_image); | ||
| 1034 | } else { | ||
| 1035 | ScaleDown(new_image); | ||
| 1036 | } | ||
| 1037 | |||
| 734 | for (const ImageId overlap_id : overlap_ids) { | 1038 | for (const ImageId overlap_id : overlap_ids) { |
| 735 | Image& overlap = slot_images[overlap_id]; | 1039 | Image& overlap = slot_images[overlap_id]; |
| 736 | if (overlap.info.num_samples != new_image.info.num_samples) { | 1040 | if (overlap.info.num_samples != new_image.info.num_samples) { |
| 737 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | 1041 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); |
| 738 | } else { | 1042 | } else { |
| 1043 | const auto& resolution = Settings::values.resolution_info; | ||
| 739 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | 1044 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); |
| 740 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | 1045 | const u32 up_scale = can_rescale ? resolution.up_scale : 1; |
| 741 | runtime.CopyImage(new_image, overlap, copies); | 1046 | const u32 down_shift = can_rescale ? resolution.down_shift : 0; |
| 1047 | auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); | ||
| 1048 | runtime.CopyImage(new_image, overlap, std::move(copies)); | ||
| 742 | } | 1049 | } |
| 743 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | 1050 | if (True(overlap.flags & ImageFlagBits::Tracked)) { |
| 744 | UntrackImage(overlap, overlap_id); | 1051 | UntrackImage(overlap, overlap_id); |
| @@ -1083,13 +1390,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1083 | "Trying to unregister an already registered image"); | 1390 | "Trying to unregister an already registered image"); |
| 1084 | image.flags &= ~ImageFlagBits::Registered; | 1391 | image.flags &= ~ImageFlagBits::Registered; |
| 1085 | image.flags &= ~ImageFlagBits::BadOverlap; | 1392 | image.flags &= ~ImageFlagBits::BadOverlap; |
| 1086 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1087 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1088 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1089 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1090 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1091 | } | ||
| 1092 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1093 | lru_cache.Free(image.lru_index); | 1393 | lru_cache.Free(image.lru_index); |
| 1094 | const auto& clear_page_table = | 1394 | const auto& clear_page_table = |
| 1095 | [this, image_id]( | 1395 | [this, image_id]( |
| @@ -1213,8 +1513,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 1213 | } | 1513 | } |
| 1214 | 1514 | ||
| 1215 | template <class P> | 1515 | template <class P> |
| 1216 | void TextureCache<P>::DeleteImage(ImageId image_id) { | 1516 | void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { |
| 1217 | ImageBase& image = slot_images[image_id]; | 1517 | ImageBase& image = slot_images[image_id]; |
| 1518 | if (image.HasScaled()) { | ||
| 1519 | total_used_memory -= GetScaledImageSizeBytes(image); | ||
| 1520 | } | ||
| 1521 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1522 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1523 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1524 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1525 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1526 | } | ||
| 1527 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1218 | const GPUVAddr gpu_addr = image.gpu_addr; | 1528 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 1219 | const auto alloc_it = image_allocs_table.find(gpu_addr); | 1529 | const auto alloc_it = image_allocs_table.find(gpu_addr); |
| 1220 | if (alloc_it == image_allocs_table.end()) { | 1530 | if (alloc_it == image_allocs_table.end()) { |
| @@ -1269,10 +1579,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { | |||
| 1269 | num_removed_overlaps); | 1579 | num_removed_overlaps); |
| 1270 | } | 1580 | } |
| 1271 | for (const ImageViewId image_view_id : image_view_ids) { | 1581 | for (const ImageViewId image_view_id : image_view_ids) { |
| 1272 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | 1582 | if (!immediate_delete) { |
| 1583 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1584 | } | ||
| 1273 | slot_image_views.erase(image_view_id); | 1585 | slot_image_views.erase(image_view_id); |
| 1274 | } | 1586 | } |
| 1275 | sentenced_images.Push(std::move(slot_images[image_id])); | 1587 | if (!immediate_delete) { |
| 1588 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1589 | } | ||
| 1276 | slot_images.erase(image_id); | 1590 | slot_images.erase(image_id); |
| 1277 | 1591 | ||
| 1278 | alloc_images.erase(alloc_image_it); | 1592 | alloc_images.erase(alloc_image_it); |
| @@ -1306,6 +1620,9 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi | |||
| 1306 | auto it = framebuffers.begin(); | 1620 | auto it = framebuffers.begin(); |
| 1307 | while (it != framebuffers.end()) { | 1621 | while (it != framebuffers.end()) { |
| 1308 | if (it->first.Contains(removed_views)) { | 1622 | if (it->first.Contains(removed_views)) { |
| 1623 | auto framebuffer_id = it->second; | ||
| 1624 | ASSERT(framebuffer_id); | ||
| 1625 | sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id])); | ||
| 1309 | it = framebuffers.erase(it); | 1626 | it = framebuffers.erase(it); |
| 1310 | } else { | 1627 | } else { |
| 1311 | ++it; | 1628 | ++it; |
| @@ -1322,26 +1639,60 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | |||
| 1322 | template <class P> | 1639 | template <class P> |
| 1323 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | 1640 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 1324 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | 1641 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; |
| 1325 | ImageBase& image = slot_images[image_id]; | 1642 | Image& image = slot_images[image_id]; |
| 1643 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); | ||
| 1326 | u64 most_recent_tick = image.modification_tick; | 1644 | u64 most_recent_tick = image.modification_tick; |
| 1327 | for (const AliasedImage& aliased : image.aliased_images) { | 1645 | for (const AliasedImage& aliased : image.aliased_images) { |
| 1328 | ImageBase& aliased_image = slot_images[aliased.id]; | 1646 | ImageBase& aliased_image = slot_images[aliased.id]; |
| 1329 | if (image.modification_tick < aliased_image.modification_tick) { | 1647 | if (image.modification_tick < aliased_image.modification_tick) { |
| 1330 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | 1648 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); |
| 1331 | aliased_images.push_back(&aliased); | 1649 | aliased_images.push_back(&aliased); |
| 1650 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); | ||
| 1332 | } | 1651 | } |
| 1333 | } | 1652 | } |
| 1334 | if (aliased_images.empty()) { | 1653 | if (aliased_images.empty()) { |
| 1335 | return; | 1654 | return; |
| 1336 | } | 1655 | } |
| 1656 | const bool can_rescale = ImageCanRescale(image); | ||
| 1657 | if (any_rescaled) { | ||
| 1658 | if (can_rescale) { | ||
| 1659 | ScaleUp(image); | ||
| 1660 | } else { | ||
| 1661 | ScaleDown(image); | ||
| 1662 | } | ||
| 1663 | } | ||
| 1337 | image.modification_tick = most_recent_tick; | 1664 | image.modification_tick = most_recent_tick; |
| 1338 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | 1665 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { |
| 1339 | const ImageBase& lhs_image = slot_images[lhs->id]; | 1666 | const ImageBase& lhs_image = slot_images[lhs->id]; |
| 1340 | const ImageBase& rhs_image = slot_images[rhs->id]; | 1667 | const ImageBase& rhs_image = slot_images[rhs->id]; |
| 1341 | return lhs_image.modification_tick < rhs_image.modification_tick; | 1668 | return lhs_image.modification_tick < rhs_image.modification_tick; |
| 1342 | }); | 1669 | }); |
| 1670 | const auto& resolution = Settings::values.resolution_info; | ||
| 1343 | for (const AliasedImage* const aliased : aliased_images) { | 1671 | for (const AliasedImage* const aliased : aliased_images) { |
| 1344 | CopyImage(image_id, aliased->id, aliased->copies); | 1672 | if (!resolution.active | !any_rescaled) { |
| 1673 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1674 | continue; | ||
| 1675 | } | ||
| 1676 | Image& aliased_image = slot_images[aliased->id]; | ||
| 1677 | if (!can_rescale) { | ||
| 1678 | ScaleDown(aliased_image); | ||
| 1679 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1680 | continue; | ||
| 1681 | } | ||
| 1682 | ScaleUp(aliased_image); | ||
| 1683 | |||
| 1684 | const bool both_2d{image.info.type == ImageType::e2D && | ||
| 1685 | aliased_image.info.type == ImageType::e2D}; | ||
| 1686 | auto copies = aliased->copies; | ||
| 1687 | for (auto copy : copies) { | ||
| 1688 | copy.extent.width = std::max<u32>( | ||
| 1689 | (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1); | ||
| 1690 | if (both_2d) { | ||
| 1691 | copy.extent.height = std::max<u32>( | ||
| 1692 | (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1); | ||
| 1693 | } | ||
| 1694 | } | ||
| 1695 | CopyImage(image_id, aliased->id, copies); | ||
| 1345 | } | 1696 | } |
| 1346 | } | 1697 | } |
| 1347 | 1698 | ||
| @@ -1377,9 +1728,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi | |||
| 1377 | } | 1728 | } |
| 1378 | 1729 | ||
| 1379 | template <class P> | 1730 | template <class P> |
| 1380 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | 1731 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) { |
| 1381 | Image& dst = slot_images[dst_id]; | 1732 | Image& dst = slot_images[dst_id]; |
| 1382 | Image& src = slot_images[src_id]; | 1733 | Image& src = slot_images[src_id]; |
| 1734 | const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled); | ||
| 1735 | if (is_rescaled) { | ||
| 1736 | ASSERT(True(dst.flags & ImageFlagBits::Rescaled)); | ||
| 1737 | const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D}; | ||
| 1738 | const auto& resolution = Settings::values.resolution_info; | ||
| 1739 | for (auto& copy : copies) { | ||
| 1740 | copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x); | ||
| 1741 | copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x); | ||
| 1742 | copy.extent.width = resolution.ScaleUp(copy.extent.width); | ||
| 1743 | if (both_2d) { | ||
| 1744 | copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y); | ||
| 1745 | copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y); | ||
| 1746 | copy.extent.height = resolution.ScaleUp(copy.extent.height); | ||
| 1747 | } | ||
| 1748 | } | ||
| 1749 | } | ||
| 1383 | const auto dst_format_type = GetFormatType(dst.info.format); | 1750 | const auto dst_format_type = GetFormatType(dst.info.format); |
| 1384 | const auto src_format_type = GetFormatType(src.info.format); | 1751 | const auto src_format_type = GetFormatType(src.info.format); |
| 1385 | if (src_format_type == dst_format_type) { | 1752 | if (src_format_type == dst_format_type) { |
| @@ -1424,7 +1791,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const | |||
| 1424 | }; | 1791 | }; |
| 1425 | UNIMPLEMENTED_IF(copy.extent != expected_size); | 1792 | UNIMPLEMENTED_IF(copy.extent != expected_size); |
| 1426 | 1793 | ||
| 1427 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); | 1794 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled); |
| 1428 | } | 1795 | } |
| 1429 | } | 1796 | } |
| 1430 | 1797 | ||
| @@ -1433,8 +1800,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) | |||
| 1433 | if (*old_id == new_id) { | 1800 | if (*old_id == new_id) { |
| 1434 | return; | 1801 | return; |
| 1435 | } | 1802 | } |
| 1436 | if (*old_id) { | 1803 | if (new_id) { |
| 1437 | const ImageViewBase& old_view = slot_image_views[*old_id]; | 1804 | const ImageViewBase& old_view = slot_image_views[new_id]; |
| 1438 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | 1805 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { |
| 1439 | uncommitted_downloads.push_back(old_view.image_id); | 1806 | uncommitted_downloads.push_back(old_view.image_id); |
| 1440 | } | 1807 | } |
| @@ -1447,10 +1814,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( | |||
| 1447 | ImageId image_id, const ImageViewInfo& view_info) { | 1814 | ImageId image_id, const ImageViewInfo& view_info) { |
| 1448 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | 1815 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); |
| 1449 | const ImageBase& image = slot_images[image_id]; | 1816 | const ImageBase& image = slot_images[image_id]; |
| 1817 | const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled); | ||
| 1450 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | 1818 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; |
| 1451 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | 1819 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; |
| 1452 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | 1820 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; |
| 1453 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | 1821 | Extent3D extent = MipSize(image.info.size, view_info.range.base.level); |
| 1822 | if (is_rescaled) { | ||
| 1823 | const auto& resolution = Settings::values.resolution_info; | ||
| 1824 | extent.width = resolution.ScaleUp(extent.width); | ||
| 1825 | if (image.info.type == ImageType::e2D) { | ||
| 1826 | extent.height = resolution.ScaleUp(extent.height); | ||
| 1827 | } | ||
| 1828 | } | ||
| 1454 | const u32 num_samples = image.info.num_samples; | 1829 | const u32 num_samples = image.info.num_samples; |
| 1455 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | 1830 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); |
| 1456 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | 1831 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2d1893c1c..643ad811c 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/texture_cache/descriptor_table.h" | 21 | #include "video_core/texture_cache/descriptor_table.h" |
| 22 | #include "video_core/texture_cache/image_base.h" | 22 | #include "video_core/texture_cache/image_base.h" |
| 23 | #include "video_core/texture_cache/image_info.h" | 23 | #include "video_core/texture_cache/image_info.h" |
| 24 | #include "video_core/texture_cache/image_view_base.h" | ||
| 24 | #include "video_core/texture_cache/image_view_info.h" | 25 | #include "video_core/texture_cache/image_view_info.h" |
| 25 | #include "video_core/texture_cache/render_targets.h" | 26 | #include "video_core/texture_cache/render_targets.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | 27 | #include "video_core/texture_cache/slot_vector.h" |
| @@ -39,6 +40,12 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; | |||
| 39 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 40 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 40 | using namespace Common::Literals; | 41 | using namespace Common::Literals; |
| 41 | 42 | ||
| 43 | struct ImageViewInOut { | ||
| 44 | u32 index{}; | ||
| 45 | bool blacklist{}; | ||
| 46 | ImageViewId id{}; | ||
| 47 | }; | ||
| 48 | |||
| 42 | template <class P> | 49 | template <class P> |
| 43 | class TextureCache { | 50 | class TextureCache { |
| 44 | /// Address shift for caching images into a hash table | 51 | /// Address shift for caching images into a hash table |
| @@ -53,11 +60,6 @@ class TextureCache { | |||
| 53 | /// True when the API can provide info about the memory of the device. | 60 | /// True when the API can provide info about the memory of the device. |
| 54 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 61 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 55 | 62 | ||
| 56 | /// Image view ID for null descriptors | ||
| 57 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||
| 58 | /// Sampler ID for bugged sampler ids | ||
| 59 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | ||
| 60 | |||
| 61 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 63 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; |
| 62 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 64 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; |
| 63 | 65 | ||
| @@ -99,11 +101,11 @@ public: | |||
| 99 | void MarkModification(ImageId id) noexcept; | 101 | void MarkModification(ImageId id) noexcept; |
| 100 | 102 | ||
| 101 | /// Fill image_view_ids with the graphics images in indices | 103 | /// Fill image_view_ids with the graphics images in indices |
| 102 | void FillGraphicsImageViews(std::span<const u32> indices, | 104 | template <bool has_blacklists> |
| 103 | std::span<ImageViewId> image_view_ids); | 105 | void FillGraphicsImageViews(std::span<ImageViewInOut> views); |
| 104 | 106 | ||
| 105 | /// Fill image_view_ids with the compute images in indices | 107 | /// Fill image_view_ids with the compute images in indices |
| 106 | void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); | 108 | void FillComputeImageViews(std::span<ImageViewInOut> views); |
| 107 | 109 | ||
| 108 | /// Get the sampler from the graphics descriptor table in the specified index | 110 | /// Get the sampler from the graphics descriptor table in the specified index |
| 109 | Sampler* GetGraphicsSampler(u32 index); | 111 | Sampler* GetGraphicsSampler(u32 index); |
| @@ -117,6 +119,11 @@ public: | |||
| 117 | /// Refresh the state for compute image view and sampler descriptors | 119 | /// Refresh the state for compute image view and sampler descriptors |
| 118 | void SynchronizeComputeDescriptors(); | 120 | void SynchronizeComputeDescriptors(); |
| 119 | 121 | ||
| 122 | /// Updates the Render Targets if they can be rescaled | ||
| 123 | /// @param is_clear True when the render targets are being used for clears | ||
| 124 | /// @retval True if the Render Targets have been rescaled. | ||
| 125 | bool RescaleRenderTargets(bool is_clear); | ||
| 126 | |||
| 120 | /// Update bound render targets and upload memory if necessary | 127 | /// Update bound render targets and upload memory if necessary |
| 121 | /// @param is_clear True when the render targets are being used for clears | 128 | /// @param is_clear True when the render targets are being used for clears |
| 122 | void UpdateRenderTargets(bool is_clear); | 129 | void UpdateRenderTargets(bool is_clear); |
| @@ -160,6 +167,10 @@ public: | |||
| 160 | /// Return true when a CPU region is modified from the GPU | 167 | /// Return true when a CPU region is modified from the GPU |
| 161 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 168 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 162 | 169 | ||
| 170 | [[nodiscard]] bool IsRescaling() const noexcept; | ||
| 171 | |||
| 172 | [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; | ||
| 173 | |||
| 163 | std::mutex mutex; | 174 | std::mutex mutex; |
| 164 | 175 | ||
| 165 | private: | 176 | private: |
| @@ -198,9 +209,10 @@ private: | |||
| 198 | void RunGarbageCollector(); | 209 | void RunGarbageCollector(); |
| 199 | 210 | ||
| 200 | /// Fills image_view_ids in the image views in indices | 211 | /// Fills image_view_ids in the image views in indices |
| 212 | template <bool has_blacklists> | ||
| 201 | void FillImageViews(DescriptorTable<TICEntry>& table, | 213 | void FillImageViews(DescriptorTable<TICEntry>& table, |
| 202 | std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, | 214 | std::span<ImageViewId> cached_image_view_ids, |
| 203 | std::span<ImageViewId> image_view_ids); | 215 | std::span<ImageViewInOut> views); |
| 204 | 216 | ||
| 205 | /// Find or create an image view in the guest descriptor table | 217 | /// Find or create an image view in the guest descriptor table |
| 206 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, | 218 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, |
| @@ -285,7 +297,7 @@ private: | |||
| 285 | void UntrackImage(ImageBase& image, ImageId image_id); | 297 | void UntrackImage(ImageBase& image, ImageId image_id); |
| 286 | 298 | ||
| 287 | /// Delete image from the cache | 299 | /// Delete image from the cache |
| 288 | void DeleteImage(ImageId image); | 300 | void DeleteImage(ImageId image, bool immediate_delete = false); |
| 289 | 301 | ||
| 290 | /// Remove image views references from the cache | 302 | /// Remove image views references from the cache |
| 291 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); | 303 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); |
| @@ -306,7 +318,7 @@ private: | |||
| 306 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); | 318 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); |
| 307 | 319 | ||
| 308 | /// Execute copies from one image to the other, even if they are incompatible | 320 | /// Execute copies from one image to the other, even if they are incompatible |
| 309 | void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); | 321 | void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies); |
| 310 | 322 | ||
| 311 | /// Bind an image view as render target, downloading resources preemtively if needed | 323 | /// Bind an image view as render target, downloading resources preemtively if needed |
| 312 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); | 324 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); |
| @@ -318,6 +330,12 @@ private: | |||
| 318 | /// Returns true if the current clear parameters clear the whole image of a given image view | 330 | /// Returns true if the current clear parameters clear the whole image of a given image view |
| 319 | [[nodiscard]] bool IsFullClear(ImageViewId id); | 331 | [[nodiscard]] bool IsFullClear(ImageViewId id); |
| 320 | 332 | ||
| 333 | bool ImageCanRescale(ImageBase& image); | ||
| 334 | void InvalidateScale(Image& image); | ||
| 335 | bool ScaleUp(Image& image); | ||
| 336 | bool ScaleDown(Image& image); | ||
| 337 | u64 GetScaledImageSizeBytes(ImageBase& image); | ||
| 338 | |||
| 321 | Runtime& runtime; | 339 | Runtime& runtime; |
| 322 | VideoCore::RasterizerInterface& rasterizer; | 340 | VideoCore::RasterizerInterface& rasterizer; |
| 323 | Tegra::Engines::Maxwell3D& maxwell3d; | 341 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -349,6 +367,7 @@ private: | |||
| 349 | VAddr virtual_invalid_space{}; | 367 | VAddr virtual_invalid_space{}; |
| 350 | 368 | ||
| 351 | bool has_deleted_images = false; | 369 | bool has_deleted_images = false; |
| 370 | bool is_rescaling = false; | ||
| 352 | u64 total_used_memory = 0; | 371 | u64 total_used_memory = 0; |
| 353 | u64 minimum_memory; | 372 | u64 minimum_memory; |
| 354 | u64 expected_memory; | 373 | u64 expected_memory; |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 47a11cb2f..5c274abdf 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h | |||
| @@ -22,6 +22,13 @@ using ImageAllocId = SlotId; | |||
| 22 | using SamplerId = SlotId; | 22 | using SamplerId = SlotId; |
| 23 | using FramebufferId = SlotId; | 23 | using FramebufferId = SlotId; |
| 24 | 24 | ||
| 25 | /// Fake image ID for null image views | ||
| 26 | constexpr ImageId NULL_IMAGE_ID{0}; | ||
| 27 | /// Image view ID for null descriptors | ||
| 28 | constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||
| 29 | /// Sampler ID for bugged sampler ids | ||
| 30 | constexpr SamplerId NULL_SAMPLER_ID{0}; | ||
| 31 | |||
| 25 | enum class ImageType : u32 { | 32 | enum class ImageType : u32 { |
| 26 | e1D, | 33 | e1D, |
| 27 | e2D, | 34 | e2D, |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 59cf2f561..ddc9fb13a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -723,7 +723,7 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | |||
| 723 | } | 723 | } |
| 724 | 724 | ||
| 725 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | 725 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, |
| 726 | SubresourceBase base) { | 726 | SubresourceBase base, u32 up_scale, u32 down_shift) { |
| 727 | ASSERT(dst.resources.levels >= src.resources.levels); | 727 | ASSERT(dst.resources.levels >= src.resources.levels); |
| 728 | ASSERT(dst.num_samples == src.num_samples); | 728 | ASSERT(dst.num_samples == src.num_samples); |
| 729 | 729 | ||
| @@ -732,7 +732,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 732 | ASSERT(src.type == ImageType::e3D); | 732 | ASSERT(src.type == ImageType::e3D); |
| 733 | ASSERT(src.resources.levels == 1); | 733 | ASSERT(src.resources.levels == 1); |
| 734 | } | 734 | } |
| 735 | 735 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; | |
| 736 | std::vector<ImageCopy> copies; | 736 | std::vector<ImageCopy> copies; |
| 737 | copies.reserve(src.resources.levels); | 737 | copies.reserve(src.resources.levels); |
| 738 | for (s32 level = 0; level < src.resources.levels; ++level) { | 738 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| @@ -762,6 +762,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 762 | if (is_dst_3d) { | 762 | if (is_dst_3d) { |
| 763 | copy.extent.depth = src.size.depth; | 763 | copy.extent.depth = src.size.depth; |
| 764 | } | 764 | } |
| 765 | copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1); | ||
| 766 | if (both_2d) { | ||
| 767 | copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1); | ||
| 768 | } | ||
| 765 | } | 769 | } |
| 766 | return copies; | 770 | return copies; |
| 767 | } | 771 | } |
| @@ -1153,10 +1157,10 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* | |||
| 1153 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | 1157 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { |
| 1154 | dst_info.format = dst->info.format; | 1158 | dst_info.format = dst->info.format; |
| 1155 | } | 1159 | } |
| 1156 | if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | 1160 | if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { |
| 1157 | dst_info.format = src->info.format; | 1161 | dst_info.format = src->info.format; |
| 1158 | } | 1162 | } |
| 1159 | if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | 1163 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { |
| 1160 | src_info.format = dst->info.format; | 1164 | src_info.format = dst->info.format; |
| 1161 | } | 1165 | } |
| 1162 | } | 1166 | } |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 766502908..7af52de2e 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -55,7 +55,8 @@ struct OverlapResult { | |||
| 55 | 55 | ||
| 56 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | 56 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, |
| 57 | const ImageInfo& src, | 57 | const ImageInfo& src, |
| 58 | SubresourceBase base); | 58 | SubresourceBase base, u32 up_scale = 1, |
| 59 | u32 down_shift = 0); | ||
| 59 | 60 | ||
| 60 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 61 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 61 | 62 | ||
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index a552543ed..06954963d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp | |||
| @@ -51,22 +51,6 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { | |||
| 51 | 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, | 51 | 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, |
| 52 | }; | 52 | }; |
| 53 | 53 | ||
| 54 | unsigned SettingsMinimumAnisotropy() noexcept { | ||
| 55 | switch (static_cast<Anisotropy>(Settings::values.max_anisotropy.GetValue())) { | ||
| 56 | default: | ||
| 57 | case Anisotropy::Default: | ||
| 58 | return 1U; | ||
| 59 | case Anisotropy::Filter2x: | ||
| 60 | return 2U; | ||
| 61 | case Anisotropy::Filter4x: | ||
| 62 | return 4U; | ||
| 63 | case Anisotropy::Filter8x: | ||
| 64 | return 8U; | ||
| 65 | case Anisotropy::Filter16x: | ||
| 66 | return 16U; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | } // Anonymous namespace | 54 | } // Anonymous namespace |
| 71 | 55 | ||
| 72 | std::array<float, 4> TSCEntry::BorderColor() const noexcept { | 56 | std::array<float, 4> TSCEntry::BorderColor() const noexcept { |
| @@ -78,7 +62,18 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept { | |||
| 78 | } | 62 | } |
| 79 | 63 | ||
| 80 | float TSCEntry::MaxAnisotropy() const noexcept { | 64 | float TSCEntry::MaxAnisotropy() const noexcept { |
| 81 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); | 65 | if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) { |
| 66 | return 1.0f; | ||
| 67 | } | ||
| 68 | const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); | ||
| 69 | u32 added_anisotropic{}; | ||
| 70 | if (anisotropic_settings == 0) { | ||
| 71 | added_anisotropic = Settings::values.resolution_info.up_scale >> | ||
| 72 | Settings::values.resolution_info.down_shift; | ||
| 73 | } else { | ||
| 74 | added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U; | ||
| 75 | } | ||
| 76 | return static_cast<float>(1U << (max_anisotropy + added_anisotropic)); | ||
| 82 | } | 77 | } |
| 83 | 78 | ||
| 84 | } // namespace Tegra::Texture | 79 | } // namespace Tegra::Texture |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index cae543a51..e852c817e 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -37,6 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( | |||
| 37 | namespace VideoCore { | 37 | namespace VideoCore { |
| 38 | 38 | ||
| 39 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { | 39 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { |
| 40 | Settings::UpdateRescalingInfo(); | ||
| 41 | |||
| 40 | const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); | 42 | const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); |
| 41 | const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; | 43 | const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; |
| 42 | const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | 44 | const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
| @@ -53,11 +55,10 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor | |||
| 53 | } | 55 | } |
| 54 | } | 56 | } |
| 55 | 57 | ||
| 56 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { | 58 | float GetResolutionScaleFactor(const RendererBase& renderer) { |
| 57 | return static_cast<u16>( | 59 | return Settings::values.resolution_info.active |
| 58 | Settings::values.resolution_factor.GetValue() != 0 | 60 | ? Settings::values.resolution_info.up_factor |
| 59 | ? Settings::values.resolution_factor.GetValue() | 61 | : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio(); |
| 60 | : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); | ||
| 61 | } | 62 | } |
| 62 | 63 | ||
| 63 | } // namespace VideoCore | 64 | } // namespace VideoCore |
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f5c27125d..f86877e86 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h | |||
| @@ -25,6 +25,6 @@ class RendererBase; | |||
| 25 | /// Creates an emulated GPU instance using the given system context. | 25 | /// Creates an emulated GPU instance using the given system context. |
| 26 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); | 26 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); |
| 27 | 27 | ||
| 28 | u16 GetResolutionScaleFactor(const RendererBase& renderer); | 28 | float GetResolutionScaleFactor(const RendererBase& renderer); |
| 29 | 29 | ||
| 30 | } // namespace VideoCore | 30 | } // namespace VideoCore |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 2d5daf6cd..10653ac6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -40,6 +40,10 @@ public: | |||
| 40 | VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 40 | VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 41 | FormatType format_type) const; | 41 | FormatType format_type) const; |
| 42 | 42 | ||
| 43 | /// Returns true if a format is supported. | ||
| 44 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | ||
| 45 | FormatType format_type) const; | ||
| 46 | |||
| 43 | /// Reports a device loss. | 47 | /// Reports a device loss. |
| 44 | void ReportLoss() const; | 48 | void ReportLoss() const; |
| 45 | 49 | ||
| @@ -370,10 +374,6 @@ private: | |||
| 370 | /// Returns true if the device natively supports blitting depth stencil images. | 374 | /// Returns true if the device natively supports blitting depth stencil images. |
| 371 | bool TestDepthStencilBlits() const; | 375 | bool TestDepthStencilBlits() const; |
| 372 | 376 | ||
| 373 | /// Returns true if a format is supported. | ||
| 374 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | ||
| 375 | FormatType format_type) const; | ||
| 376 | |||
| 377 | VkInstance instance; ///< Vulkan instance. | 377 | VkInstance instance; ///< Vulkan instance. |
| 378 | vk::DeviceDispatch dld; ///< Device function pointers. | 378 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 379 | vk::PhysicalDevice physical; ///< Physical device. | 379 | vk::PhysicalDevice physical; ///< Physical device. |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 46ab0603d..976acd176 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -628,11 +628,9 @@ void GRenderWindow::ReleaseRenderTarget() { | |||
| 628 | main_context.reset(); | 628 | main_context.reset(); |
| 629 | } | 629 | } |
| 630 | 630 | ||
| 631 | void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { | 631 | void GRenderWindow::CaptureScreenshot(const QString& screenshot_path) { |
| 632 | VideoCore::RendererBase& renderer = system.Renderer(); | 632 | auto& renderer = system.Renderer(); |
| 633 | if (res_scale == 0) { | 633 | const f32 res_scale = VideoCore::GetResolutionScaleFactor(renderer); |
| 634 | res_scale = VideoCore::GetResolutionScaleFactor(renderer); | ||
| 635 | } | ||
| 636 | 634 | ||
| 637 | const Layout::FramebufferLayout layout{Layout::FrameLayoutFromResolutionScale(res_scale)}; | 635 | const Layout::FramebufferLayout layout{Layout::FrameLayoutFromResolutionScale(res_scale)}; |
| 638 | screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); | 636 | screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); |
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index e6a0666e9..40fd4a9d6 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h | |||
| @@ -178,7 +178,7 @@ public: | |||
| 178 | 178 | ||
| 179 | bool IsLoadingComplete() const; | 179 | bool IsLoadingComplete() const; |
| 180 | 180 | ||
| 181 | void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); | 181 | void CaptureScreenshot(const QString& screenshot_path); |
| 182 | 182 | ||
| 183 | std::pair<u32, u32> ScaleTouch(const QPointF& pos) const; | 183 | std::pair<u32, u32> ScaleTouch(const QPointF& pos) const; |
| 184 | 184 | ||
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index faea5dda1..8227d06bc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -824,6 +824,9 @@ void Config::ReadRendererValues() { | |||
| 824 | ReadGlobalSetting(Settings::values.vulkan_device); | 824 | ReadGlobalSetting(Settings::values.vulkan_device); |
| 825 | ReadGlobalSetting(Settings::values.fullscreen_mode); | 825 | ReadGlobalSetting(Settings::values.fullscreen_mode); |
| 826 | ReadGlobalSetting(Settings::values.aspect_ratio); | 826 | ReadGlobalSetting(Settings::values.aspect_ratio); |
| 827 | ReadGlobalSetting(Settings::values.resolution_setup); | ||
| 828 | ReadGlobalSetting(Settings::values.scaling_filter); | ||
| 829 | ReadGlobalSetting(Settings::values.anti_aliasing); | ||
| 827 | ReadGlobalSetting(Settings::values.max_anisotropy); | 830 | ReadGlobalSetting(Settings::values.max_anisotropy); |
| 828 | ReadGlobalSetting(Settings::values.use_speed_limit); | 831 | ReadGlobalSetting(Settings::values.use_speed_limit); |
| 829 | ReadGlobalSetting(Settings::values.speed_limit); | 832 | ReadGlobalSetting(Settings::values.speed_limit); |
| @@ -1364,6 +1367,18 @@ void Config::SaveRendererValues() { | |||
| 1364 | static_cast<u32>(Settings::values.fullscreen_mode.GetDefault()), | 1367 | static_cast<u32>(Settings::values.fullscreen_mode.GetDefault()), |
| 1365 | Settings::values.fullscreen_mode.UsingGlobal()); | 1368 | Settings::values.fullscreen_mode.UsingGlobal()); |
| 1366 | WriteGlobalSetting(Settings::values.aspect_ratio); | 1369 | WriteGlobalSetting(Settings::values.aspect_ratio); |
| 1370 | WriteSetting(QString::fromStdString(Settings::values.resolution_setup.GetLabel()), | ||
| 1371 | static_cast<u32>(Settings::values.resolution_setup.GetValue(global)), | ||
| 1372 | static_cast<u32>(Settings::values.resolution_setup.GetDefault()), | ||
| 1373 | Settings::values.resolution_setup.UsingGlobal()); | ||
| 1374 | WriteSetting(QString::fromStdString(Settings::values.scaling_filter.GetLabel()), | ||
| 1375 | static_cast<u32>(Settings::values.scaling_filter.GetValue(global)), | ||
| 1376 | static_cast<u32>(Settings::values.scaling_filter.GetDefault()), | ||
| 1377 | Settings::values.scaling_filter.UsingGlobal()); | ||
| 1378 | WriteSetting(QString::fromStdString(Settings::values.anti_aliasing.GetLabel()), | ||
| 1379 | static_cast<u32>(Settings::values.anti_aliasing.GetValue(global)), | ||
| 1380 | static_cast<u32>(Settings::values.anti_aliasing.GetDefault()), | ||
| 1381 | Settings::values.anti_aliasing.UsingGlobal()); | ||
| 1367 | WriteGlobalSetting(Settings::values.max_anisotropy); | 1382 | WriteGlobalSetting(Settings::values.max_anisotropy); |
| 1368 | WriteGlobalSetting(Settings::values.use_speed_limit); | 1383 | WriteGlobalSetting(Settings::values.use_speed_limit); |
| 1369 | WriteGlobalSetting(Settings::values.speed_limit); | 1384 | WriteGlobalSetting(Settings::values.speed_limit); |
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index a7f4a6720..d673c1cdc 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h | |||
| @@ -189,5 +189,8 @@ Q_DECLARE_METATYPE(Settings::CPUAccuracy); | |||
| 189 | Q_DECLARE_METATYPE(Settings::GPUAccuracy); | 189 | Q_DECLARE_METATYPE(Settings::GPUAccuracy); |
| 190 | Q_DECLARE_METATYPE(Settings::FullscreenMode); | 190 | Q_DECLARE_METATYPE(Settings::FullscreenMode); |
| 191 | Q_DECLARE_METATYPE(Settings::NvdecEmulation); | 191 | Q_DECLARE_METATYPE(Settings::NvdecEmulation); |
| 192 | Q_DECLARE_METATYPE(Settings::ResolutionSetup); | ||
| 193 | Q_DECLARE_METATYPE(Settings::ScalingFilter); | ||
| 194 | Q_DECLARE_METATYPE(Settings::AntiAliasing); | ||
| 192 | Q_DECLARE_METATYPE(Settings::RendererBackend); | 195 | Q_DECLARE_METATYPE(Settings::RendererBackend); |
| 193 | Q_DECLARE_METATYPE(Settings::ShaderBackend); | 196 | Q_DECLARE_METATYPE(Settings::ShaderBackend); |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 8e20cc6f3..59f975a6e 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -89,6 +89,7 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 89 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); | 89 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); |
| 90 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 90 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 91 | ui->nvdec_emulation_widget->setEnabled(runtime_lock); | 91 | ui->nvdec_emulation_widget->setEnabled(runtime_lock); |
| 92 | ui->resolution_combobox->setEnabled(runtime_lock); | ||
| 92 | ui->accelerate_astc->setEnabled(runtime_lock); | 93 | ui->accelerate_astc->setEnabled(runtime_lock); |
| 93 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); | 94 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); |
| 94 | ui->use_asynchronous_gpu_emulation->setChecked( | 95 | ui->use_asynchronous_gpu_emulation->setChecked( |
| @@ -102,6 +103,12 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 102 | ui->nvdec_emulation->setCurrentIndex( | 103 | ui->nvdec_emulation->setCurrentIndex( |
| 103 | static_cast<int>(Settings::values.nvdec_emulation.GetValue())); | 104 | static_cast<int>(Settings::values.nvdec_emulation.GetValue())); |
| 104 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); | 105 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); |
| 106 | ui->resolution_combobox->setCurrentIndex( | ||
| 107 | static_cast<int>(Settings::values.resolution_setup.GetValue())); | ||
| 108 | ui->scaling_filter_combobox->setCurrentIndex( | ||
| 109 | static_cast<int>(Settings::values.scaling_filter.GetValue())); | ||
| 110 | ui->anti_aliasing_combobox->setCurrentIndex( | ||
| 111 | static_cast<int>(Settings::values.anti_aliasing.GetValue())); | ||
| 105 | } else { | 112 | } else { |
| 106 | ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); | 113 | ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); |
| 107 | ConfigurationShared::SetHighlight(ui->api_widget, | 114 | ConfigurationShared::SetHighlight(ui->api_widget, |
| @@ -122,6 +129,21 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 122 | ConfigurationShared::SetHighlight(ui->ar_label, | 129 | ConfigurationShared::SetHighlight(ui->ar_label, |
| 123 | !Settings::values.aspect_ratio.UsingGlobal()); | 130 | !Settings::values.aspect_ratio.UsingGlobal()); |
| 124 | 131 | ||
| 132 | ConfigurationShared::SetPerGameSetting(ui->resolution_combobox, | ||
| 133 | &Settings::values.resolution_setup); | ||
| 134 | ConfigurationShared::SetHighlight(ui->resolution_label, | ||
| 135 | !Settings::values.resolution_setup.UsingGlobal()); | ||
| 136 | |||
| 137 | ConfigurationShared::SetPerGameSetting(ui->scaling_filter_combobox, | ||
| 138 | &Settings::values.scaling_filter); | ||
| 139 | ConfigurationShared::SetHighlight(ui->scaling_filter_label, | ||
| 140 | !Settings::values.scaling_filter.UsingGlobal()); | ||
| 141 | |||
| 142 | ConfigurationShared::SetPerGameSetting(ui->anti_aliasing_combobox, | ||
| 143 | &Settings::values.anti_aliasing); | ||
| 144 | ConfigurationShared::SetHighlight(ui->anti_aliasing_label, | ||
| 145 | !Settings::values.anti_aliasing.UsingGlobal()); | ||
| 146 | |||
| 125 | ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); | 147 | ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); |
| 126 | ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); | 148 | ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); |
| 127 | ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); | 149 | ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); |
| @@ -133,11 +155,22 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 133 | } | 155 | } |
| 134 | 156 | ||
| 135 | void ConfigureGraphics::ApplyConfiguration() { | 157 | void ConfigureGraphics::ApplyConfiguration() { |
| 158 | const auto resolution_setup = static_cast<Settings::ResolutionSetup>( | ||
| 159 | ui->resolution_combobox->currentIndex() - | ||
| 160 | ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); | ||
| 161 | |||
| 162 | const auto scaling_filter = static_cast<Settings::ScalingFilter>( | ||
| 163 | ui->scaling_filter_combobox->currentIndex() - | ||
| 164 | ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); | ||
| 165 | |||
| 166 | const auto anti_aliasing = static_cast<Settings::AntiAliasing>( | ||
| 167 | ui->anti_aliasing_combobox->currentIndex() - | ||
| 168 | ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); | ||
| 169 | |||
| 136 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode, | 170 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode, |
| 137 | ui->fullscreen_mode_combobox); | 171 | ui->fullscreen_mode_combobox); |
| 138 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, | 172 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, |
| 139 | ui->aspect_ratio_combobox); | 173 | ui->aspect_ratio_combobox); |
| 140 | |||
| 141 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_disk_shader_cache, | 174 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_disk_shader_cache, |
| 142 | ui->use_disk_shader_cache, use_disk_shader_cache); | 175 | ui->use_disk_shader_cache, use_disk_shader_cache); |
| 143 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, | 176 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, |
| @@ -165,7 +198,34 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 165 | Settings::values.bg_green.SetValue(static_cast<u8>(bg_color.green())); | 198 | Settings::values.bg_green.SetValue(static_cast<u8>(bg_color.green())); |
| 166 | Settings::values.bg_blue.SetValue(static_cast<u8>(bg_color.blue())); | 199 | Settings::values.bg_blue.SetValue(static_cast<u8>(bg_color.blue())); |
| 167 | } | 200 | } |
| 201 | if (Settings::values.resolution_setup.UsingGlobal()) { | ||
| 202 | Settings::values.resolution_setup.SetValue(resolution_setup); | ||
| 203 | } | ||
| 204 | if (Settings::values.scaling_filter.UsingGlobal()) { | ||
| 205 | Settings::values.scaling_filter.SetValue(scaling_filter); | ||
| 206 | } | ||
| 207 | if (Settings::values.anti_aliasing.UsingGlobal()) { | ||
| 208 | Settings::values.anti_aliasing.SetValue(anti_aliasing); | ||
| 209 | } | ||
| 168 | } else { | 210 | } else { |
| 211 | if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | ||
| 212 | Settings::values.resolution_setup.SetGlobal(true); | ||
| 213 | } else { | ||
| 214 | Settings::values.resolution_setup.SetGlobal(false); | ||
| 215 | Settings::values.resolution_setup.SetValue(resolution_setup); | ||
| 216 | } | ||
| 217 | if (ui->scaling_filter_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | ||
| 218 | Settings::values.scaling_filter.SetGlobal(true); | ||
| 219 | } else { | ||
| 220 | Settings::values.scaling_filter.SetGlobal(false); | ||
| 221 | Settings::values.scaling_filter.SetValue(scaling_filter); | ||
| 222 | } | ||
| 223 | if (ui->anti_aliasing_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | ||
| 224 | Settings::values.anti_aliasing.SetGlobal(true); | ||
| 225 | } else { | ||
| 226 | Settings::values.anti_aliasing.SetGlobal(false); | ||
| 227 | Settings::values.anti_aliasing.SetValue(anti_aliasing); | ||
| 228 | } | ||
| 169 | if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | 229 | if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { |
| 170 | Settings::values.renderer_backend.SetGlobal(true); | 230 | Settings::values.renderer_backend.SetGlobal(true); |
| 171 | Settings::values.shader_backend.SetGlobal(true); | 231 | Settings::values.shader_backend.SetGlobal(true); |
| @@ -312,6 +372,9 @@ void ConfigureGraphics::SetupPerGameUI() { | |||
| 312 | ui->device->setEnabled(Settings::values.renderer_backend.UsingGlobal()); | 372 | ui->device->setEnabled(Settings::values.renderer_backend.UsingGlobal()); |
| 313 | ui->fullscreen_mode_combobox->setEnabled(Settings::values.fullscreen_mode.UsingGlobal()); | 373 | ui->fullscreen_mode_combobox->setEnabled(Settings::values.fullscreen_mode.UsingGlobal()); |
| 314 | ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); | 374 | ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); |
| 375 | ui->resolution_combobox->setEnabled(Settings::values.resolution_setup.UsingGlobal()); | ||
| 376 | ui->scaling_filter_combobox->setEnabled(Settings::values.scaling_filter.UsingGlobal()); | ||
| 377 | ui->anti_aliasing_combobox->setEnabled(Settings::values.anti_aliasing.UsingGlobal()); | ||
| 315 | ui->use_asynchronous_gpu_emulation->setEnabled( | 378 | ui->use_asynchronous_gpu_emulation->setEnabled( |
| 316 | Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); | 379 | Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); |
| 317 | ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); | 380 | ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); |
| @@ -340,6 +403,15 @@ void ConfigureGraphics::SetupPerGameUI() { | |||
| 340 | ConfigurationShared::SetColoredComboBox( | 403 | ConfigurationShared::SetColoredComboBox( |
| 341 | ui->fullscreen_mode_combobox, ui->fullscreen_mode_label, | 404 | ui->fullscreen_mode_combobox, ui->fullscreen_mode_label, |
| 342 | static_cast<int>(Settings::values.fullscreen_mode.GetValue(true))); | 405 | static_cast<int>(Settings::values.fullscreen_mode.GetValue(true))); |
| 406 | ConfigurationShared::SetColoredComboBox( | ||
| 407 | ui->resolution_combobox, ui->resolution_label, | ||
| 408 | static_cast<int>(Settings::values.resolution_setup.GetValue(true))); | ||
| 409 | ConfigurationShared::SetColoredComboBox( | ||
| 410 | ui->scaling_filter_combobox, ui->scaling_filter_label, | ||
| 411 | static_cast<int>(Settings::values.scaling_filter.GetValue(true))); | ||
| 412 | ConfigurationShared::SetColoredComboBox( | ||
| 413 | ui->anti_aliasing_combobox, ui->anti_aliasing_label, | ||
| 414 | static_cast<int>(Settings::values.anti_aliasing.GetValue(true))); | ||
| 343 | ConfigurationShared::InsertGlobalItem( | 415 | ConfigurationShared::InsertGlobalItem( |
| 344 | ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); | 416 | ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); |
| 345 | ConfigurationShared::InsertGlobalItem( | 417 | ConfigurationShared::InsertGlobalItem( |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index beae74344..660b68c1c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -310,6 +310,173 @@ | |||
| 310 | </widget> | 310 | </widget> |
| 311 | </item> | 311 | </item> |
| 312 | <item> | 312 | <item> |
| 313 | <widget class="QWidget" name="resolution_layout" native="true"> | ||
| 314 | <layout class="QHBoxLayout" name="horizontalLayout_5"> | ||
| 315 | <property name="leftMargin"> | ||
| 316 | <number>0</number> | ||
| 317 | </property> | ||
| 318 | <property name="topMargin"> | ||
| 319 | <number>0</number> | ||
| 320 | </property> | ||
| 321 | <property name="rightMargin"> | ||
| 322 | <number>0</number> | ||
| 323 | </property> | ||
| 324 | <property name="bottomMargin"> | ||
| 325 | <number>0</number> | ||
| 326 | </property> | ||
| 327 | <item> | ||
| 328 | <widget class="QLabel" name="resolution_label"> | ||
| 329 | <property name="text"> | ||
| 330 | <string>Resolution:</string> | ||
| 331 | </property> | ||
| 332 | </widget> | ||
| 333 | </item> | ||
| 334 | <item> | ||
| 335 | <widget class="QComboBox" name="resolution_combobox"> | ||
| 336 | <item> | ||
| 337 | <property name="text"> | ||
| 338 | <string>0.5X (360p/540p) [EXPERIMENTAL]</string> | ||
| 339 | </property> | ||
| 340 | </item> | ||
| 341 | <item> | ||
| 342 | <property name="text"> | ||
| 343 | <string>0.75X (540p/810p) [EXPERIMENTAL]</string> | ||
| 344 | </property> | ||
| 345 | </item> | ||
| 346 | <item> | ||
| 347 | <property name="text"> | ||
| 348 | <string>1X (720p/1080p)</string> | ||
| 349 | </property> | ||
| 350 | </item> | ||
| 351 | <item> | ||
| 352 | <property name="text"> | ||
| 353 | <string>2X (1440p/2160p)</string> | ||
| 354 | </property> | ||
| 355 | </item> | ||
| 356 | <item> | ||
| 357 | <property name="text"> | ||
| 358 | <string>3X (2160p/3240p)</string> | ||
| 359 | </property> | ||
| 360 | </item> | ||
| 361 | <item> | ||
| 362 | <property name="text"> | ||
| 363 | <string>4X (2880p/4320p)</string> | ||
| 364 | </property> | ||
| 365 | </item> | ||
| 366 | <item> | ||
| 367 | <property name="text"> | ||
| 368 | <string>5X (3600p/5400p)</string> | ||
| 369 | </property> | ||
| 370 | </item> | ||
| 371 | <item> | ||
| 372 | <property name="text"> | ||
| 373 | <string>6X (4320p/6480p)</string> | ||
| 374 | </property> | ||
| 375 | </item> | ||
| 376 | </widget> | ||
| 377 | </item> | ||
| 378 | </layout> | ||
| 379 | </widget> | ||
| 380 | </item> | ||
| 381 | <item> | ||
| 382 | <widget class="QWidget" name="scaling_filter_layout" native="true"> | ||
| 383 | <layout class="QHBoxLayout" name="horizontalLayout_6"> | ||
| 384 | <property name="leftMargin"> | ||
| 385 | <number>0</number> | ||
| 386 | </property> | ||
| 387 | <property name="topMargin"> | ||
| 388 | <number>0</number> | ||
| 389 | </property> | ||
| 390 | <property name="rightMargin"> | ||
| 391 | <number>0</number> | ||
| 392 | </property> | ||
| 393 | <property name="bottomMargin"> | ||
| 394 | <number>0</number> | ||
| 395 | </property> | ||
| 396 | <item> | ||
| 397 | <widget class="QLabel" name="scaling_filter_label"> | ||
| 398 | <property name="text"> | ||
| 399 | <string>Window Adapting Filter:</string> | ||
| 400 | </property> | ||
| 401 | </widget> | ||
| 402 | </item> | ||
| 403 | <item> | ||
| 404 | <widget class="QComboBox" name="scaling_filter_combobox"> | ||
| 405 | <item> | ||
| 406 | <property name="text"> | ||
| 407 | <string>Nearest Neighbor</string> | ||
| 408 | </property> | ||
| 409 | </item> | ||
| 410 | <item> | ||
| 411 | <property name="text"> | ||
| 412 | <string>Bilinear</string> | ||
| 413 | </property> | ||
| 414 | </item> | ||
| 415 | <item> | ||
| 416 | <property name="text"> | ||
| 417 | <string>Bicubic</string> | ||
| 418 | </property> | ||
| 419 | </item> | ||
| 420 | <item> | ||
| 421 | <property name="text"> | ||
| 422 | <string>Gaussian</string> | ||
| 423 | </property> | ||
| 424 | </item> | ||
| 425 | <item> | ||
| 426 | <property name="text"> | ||
| 427 | <string>ScaleForce</string> | ||
| 428 | </property> | ||
| 429 | </item> | ||
| 430 | <item> | ||
| 431 | <property name="text"> | ||
| 432 | <string>AMD's FidelityFX™️ Super Resolution [Vulkan Only]</string> | ||
| 433 | </property> | ||
| 434 | </item> | ||
| 435 | </widget> | ||
| 436 | </item> | ||
| 437 | </layout> | ||
| 438 | </widget> | ||
| 439 | </item> | ||
| 440 | <item> | ||
| 441 | <widget class="QWidget" name="anti_aliasing_layout" native="true"> | ||
| 442 | <layout class="QHBoxLayout" name="horizontalLayout_7"> | ||
| 443 | <property name="leftMargin"> | ||
| 444 | <number>0</number> | ||
| 445 | </property> | ||
| 446 | <property name="topMargin"> | ||
| 447 | <number>0</number> | ||
| 448 | </property> | ||
| 449 | <property name="rightMargin"> | ||
| 450 | <number>0</number> | ||
| 451 | </property> | ||
| 452 | <property name="bottomMargin"> | ||
| 453 | <number>0</number> | ||
| 454 | </property> | ||
| 455 | <item> | ||
| 456 | <widget class="QLabel" name="anti_aliasing_label"> | ||
| 457 | <property name="text"> | ||
| 458 | <string>Anti-Aliasing Method:</string> | ||
| 459 | </property> | ||
| 460 | </widget> | ||
| 461 | </item> | ||
| 462 | <item> | ||
| 463 | <widget class="QComboBox" name="anti_aliasing_combobox"> | ||
| 464 | <item> | ||
| 465 | <property name="text"> | ||
| 466 | <string>None</string> | ||
| 467 | </property> | ||
| 468 | </item> | ||
| 469 | <item> | ||
| 470 | <property name="text"> | ||
| 471 | <string>FXAA</string> | ||
| 472 | </property> | ||
| 473 | </item> | ||
| 474 | </widget> | ||
| 475 | </item> | ||
| 476 | </layout> | ||
| 477 | </widget> | ||
| 478 | </item> | ||
| 479 | <item> | ||
| 313 | <widget class="QWidget" name="bg_layout" native="true"> | 480 | <widget class="QWidget" name="bg_layout" native="true"> |
| 314 | <property name="sizePolicy"> | 481 | <property name="sizePolicy"> |
| 315 | <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> | 482 | <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index d06b45f17..96de0b3d1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -125,27 +125,32 @@ | |||
| 125 | <widget class="QComboBox" name="anisotropic_filtering_combobox"> | 125 | <widget class="QComboBox" name="anisotropic_filtering_combobox"> |
| 126 | <item> | 126 | <item> |
| 127 | <property name="text"> | 127 | <property name="text"> |
| 128 | <string>Automatic</string> | ||
| 129 | </property> | ||
| 130 | </item> | ||
| 131 | <item> | ||
| 132 | <property name="text"> | ||
| 128 | <string>Default</string> | 133 | <string>Default</string> |
| 129 | </property> | 134 | </property> |
| 130 | </item> | 135 | </item> |
| 131 | <item> | 136 | <item> |
| 132 | <property name="text"> | 137 | <property name="text"> |
| 133 | <string>2x (WILL BREAK THINGS)</string> | 138 | <string>2x</string> |
| 134 | </property> | 139 | </property> |
| 135 | </item> | 140 | </item> |
| 136 | <item> | 141 | <item> |
| 137 | <property name="text"> | 142 | <property name="text"> |
| 138 | <string>4x (WILL BREAK THINGS)</string> | 143 | <string>4x</string> |
| 139 | </property> | 144 | </property> |
| 140 | </item> | 145 | </item> |
| 141 | <item> | 146 | <item> |
| 142 | <property name="text"> | 147 | <property name="text"> |
| 143 | <string>8x (WILL BREAK THINGS)</string> | 148 | <string>8x</string> |
| 144 | </property> | 149 | </property> |
| 145 | </item> | 150 | </item> |
| 146 | <item> | 151 | <item> |
| 147 | <property name="text"> | 152 | <property name="text"> |
| 148 | <string>16x (WILL BREAK THINGS)</string> | 153 | <string>16x</string> |
| 149 | </property> | 154 | </property> |
| 150 | </item> | 155 | </item> |
| 151 | </widget> | 156 | </widget> |
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index 33110685a..a8b254199 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp | |||
| @@ -163,7 +163,7 @@ void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { | |||
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { | 165 | void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { |
| 166 | const auto wheel_position = ev->position().toPoint(); | 166 | const auto wheel_position = ev->pos(); |
| 167 | MicroProfileMousePosition(wheel_position.x() / x_scale, wheel_position.y() / y_scale, | 167 | MicroProfileMousePosition(wheel_position.x() / x_scale, wheel_position.y() / y_scale, |
| 168 | ev->angleDelta().y() / 120); | 168 | ev->angleDelta().y() / 120); |
| 169 | ev->accept(); | 169 | ev->accept(); |
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 6bd0f9ee9..2af95dbe5 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp | |||
| @@ -159,7 +159,7 @@ GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} { | |||
| 159 | * @return true if the haystack contains all words of userinput | 159 | * @return true if the haystack contains all words of userinput |
| 160 | */ | 160 | */ |
| 161 | static bool ContainsAllWords(const QString& haystack, const QString& userinput) { | 161 | static bool ContainsAllWords(const QString& haystack, const QString& userinput) { |
| 162 | const QStringList userinput_split = userinput.split(QLatin1Char{' '}, Qt::SkipEmptyParts); | 162 | const QStringList userinput_split = userinput.split(QLatin1Char{' '}, QString::SkipEmptyParts); |
| 163 | 163 | ||
| 164 | return std::all_of(userinput_split.begin(), userinput_split.end(), | 164 | return std::all_of(userinput_split.begin(), userinput_split.end(), |
| 165 | [&haystack](const QString& s) { return haystack.contains(s); }); | 165 | [&haystack](const QString& s) { return haystack.contains(s); }); |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4e5552d2a..d057dc889 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -747,6 +747,8 @@ void GMainWindow::InitializeWidgets() { | |||
| 747 | 747 | ||
| 748 | shader_building_label = new QLabel(); | 748 | shader_building_label = new QLabel(); |
| 749 | shader_building_label->setToolTip(tr("The amount of shaders currently being built")); | 749 | shader_building_label->setToolTip(tr("The amount of shaders currently being built")); |
| 750 | res_scale_label = new QLabel(); | ||
| 751 | res_scale_label->setToolTip(tr("The current selected resolution scaling multiplier.")); | ||
| 750 | emu_speed_label = new QLabel(); | 752 | emu_speed_label = new QLabel(); |
| 751 | emu_speed_label->setToolTip( | 753 | emu_speed_label->setToolTip( |
| 752 | tr("Current emulation speed. Values higher or lower than 100% " | 754 | tr("Current emulation speed. Values higher or lower than 100% " |
| @@ -759,8 +761,8 @@ void GMainWindow::InitializeWidgets() { | |||
| 759 | tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " | 761 | tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " |
| 760 | "full-speed emulation this should be at most 16.67 ms.")); | 762 | "full-speed emulation this should be at most 16.67 ms.")); |
| 761 | 763 | ||
| 762 | for (auto& label : | 764 | for (auto& label : {shader_building_label, res_scale_label, emu_speed_label, game_fps_label, |
| 763 | {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) { | 765 | emu_frametime_label}) { |
| 764 | label->setVisible(false); | 766 | label->setVisible(false); |
| 765 | label->setFrameStyle(QFrame::NoFrame); | 767 | label->setFrameStyle(QFrame::NoFrame); |
| 766 | label->setContentsMargins(4, 0, 4, 0); | 768 | label->setContentsMargins(4, 0, 4, 0); |
| @@ -772,6 +774,55 @@ void GMainWindow::InitializeWidgets() { | |||
| 772 | tas_label->setFocusPolicy(Qt::NoFocus); | 774 | tas_label->setFocusPolicy(Qt::NoFocus); |
| 773 | statusBar()->insertPermanentWidget(0, tas_label); | 775 | statusBar()->insertPermanentWidget(0, tas_label); |
| 774 | 776 | ||
| 777 | // setup AA button | ||
| 778 | aa_status_button = new QPushButton(); | ||
| 779 | aa_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); | ||
| 780 | aa_status_button->setFocusPolicy(Qt::NoFocus); | ||
| 781 | connect(aa_status_button, &QPushButton::clicked, [&] { | ||
| 782 | auto aa_mode = Settings::values.anti_aliasing.GetValue(); | ||
| 783 | if (aa_mode == Settings::AntiAliasing::LastAA) { | ||
| 784 | aa_mode = Settings::AntiAliasing::None; | ||
| 785 | } else { | ||
| 786 | aa_mode = static_cast<Settings::AntiAliasing>(static_cast<u32>(aa_mode) + 1); | ||
| 787 | } | ||
| 788 | Settings::values.anti_aliasing.SetValue(aa_mode); | ||
| 789 | aa_status_button->setChecked(true); | ||
| 790 | UpdateAAText(); | ||
| 791 | }); | ||
| 792 | UpdateAAText(); | ||
| 793 | aa_status_button->setCheckable(true); | ||
| 794 | aa_status_button->setChecked(true); | ||
| 795 | statusBar()->insertPermanentWidget(0, aa_status_button); | ||
| 796 | |||
| 797 | // Setup Filter button | ||
| 798 | filter_status_button = new QPushButton(); | ||
| 799 | filter_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); | ||
| 800 | filter_status_button->setFocusPolicy(Qt::NoFocus); | ||
| 801 | connect(filter_status_button, &QPushButton::clicked, [&] { | ||
| 802 | auto filter = Settings::values.scaling_filter.GetValue(); | ||
| 803 | if (filter == Settings::ScalingFilter::LastFilter) { | ||
| 804 | filter = Settings::ScalingFilter::NearestNeighbor; | ||
| 805 | } else { | ||
| 806 | filter = static_cast<Settings::ScalingFilter>(static_cast<u32>(filter) + 1); | ||
| 807 | } | ||
| 808 | if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && | ||
| 809 | filter == Settings::ScalingFilter::Fsr) { | ||
| 810 | filter = Settings::ScalingFilter::NearestNeighbor; | ||
| 811 | } | ||
| 812 | Settings::values.scaling_filter.SetValue(filter); | ||
| 813 | filter_status_button->setChecked(true); | ||
| 814 | UpdateFilterText(); | ||
| 815 | }); | ||
| 816 | auto filter = Settings::values.scaling_filter.GetValue(); | ||
| 817 | if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && | ||
| 818 | filter == Settings::ScalingFilter::Fsr) { | ||
| 819 | Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); | ||
| 820 | } | ||
| 821 | UpdateFilterText(); | ||
| 822 | filter_status_button->setCheckable(true); | ||
| 823 | filter_status_button->setChecked(true); | ||
| 824 | statusBar()->insertPermanentWidget(0, filter_status_button); | ||
| 825 | |||
| 775 | // Setup Dock button | 826 | // Setup Dock button |
| 776 | dock_status_button = new QPushButton(); | 827 | dock_status_button = new QPushButton(); |
| 777 | dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); | 828 | dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); |
| @@ -842,6 +893,11 @@ void GMainWindow::InitializeWidgets() { | |||
| 842 | Settings::values.renderer_backend.SetValue(Settings::RendererBackend::Vulkan); | 893 | Settings::values.renderer_backend.SetValue(Settings::RendererBackend::Vulkan); |
| 843 | } else { | 894 | } else { |
| 844 | Settings::values.renderer_backend.SetValue(Settings::RendererBackend::OpenGL); | 895 | Settings::values.renderer_backend.SetValue(Settings::RendererBackend::OpenGL); |
| 896 | const auto filter = Settings::values.scaling_filter.GetValue(); | ||
| 897 | if (filter == Settings::ScalingFilter::Fsr) { | ||
| 898 | Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); | ||
| 899 | UpdateFilterText(); | ||
| 900 | } | ||
| 845 | } | 901 | } |
| 846 | 902 | ||
| 847 | system->ApplySettings(); | 903 | system->ApplySettings(); |
| @@ -1535,6 +1591,7 @@ void GMainWindow::ShutdownGame() { | |||
| 1535 | // Disable status bar updates | 1591 | // Disable status bar updates |
| 1536 | status_bar_update_timer.stop(); | 1592 | status_bar_update_timer.stop(); |
| 1537 | shader_building_label->setVisible(false); | 1593 | shader_building_label->setVisible(false); |
| 1594 | res_scale_label->setVisible(false); | ||
| 1538 | emu_speed_label->setVisible(false); | 1595 | emu_speed_label->setVisible(false); |
| 1539 | game_fps_label->setVisible(false); | 1596 | game_fps_label->setVisible(false); |
| 1540 | emu_frametime_label->setVisible(false); | 1597 | emu_frametime_label->setVisible(false); |
| @@ -2889,8 +2946,7 @@ void GMainWindow::OnCaptureScreenshot() { | |||
| 2889 | } | 2946 | } |
| 2890 | } | 2947 | } |
| 2891 | #endif | 2948 | #endif |
| 2892 | render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(), | 2949 | render_window->CaptureScreenshot(filename); |
| 2893 | filename); | ||
| 2894 | } | 2950 | } |
| 2895 | 2951 | ||
| 2896 | // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant | 2952 | // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant |
| @@ -2981,6 +3037,11 @@ void GMainWindow::UpdateStatusBar() { | |||
| 2981 | shader_building_label->setVisible(false); | 3037 | shader_building_label->setVisible(false); |
| 2982 | } | 3038 | } |
| 2983 | 3039 | ||
| 3040 | const auto res_info = Settings::values.resolution_info; | ||
| 3041 | const auto res_scale = res_info.up_factor; | ||
| 3042 | res_scale_label->setText( | ||
| 3043 | tr("Scale: %1x", "%1 is the resolution scaling factor").arg(res_scale)); | ||
| 3044 | |||
| 2984 | if (Settings::values.use_speed_limit.GetValue()) { | 3045 | if (Settings::values.use_speed_limit.GetValue()) { |
| 2985 | emu_speed_label->setText(tr("Speed: %1% / %2%") | 3046 | emu_speed_label->setText(tr("Speed: %1% / %2%") |
| 2986 | .arg(results.emulation_speed * 100.0, 0, 'f', 0) | 3047 | .arg(results.emulation_speed * 100.0, 0, 'f', 0) |
| @@ -2996,6 +3057,7 @@ void GMainWindow::UpdateStatusBar() { | |||
| 2996 | } | 3057 | } |
| 2997 | emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); | 3058 | emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); |
| 2998 | 3059 | ||
| 3060 | res_scale_label->setVisible(true); | ||
| 2999 | emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue()); | 3061 | emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue()); |
| 3000 | game_fps_label->setVisible(true); | 3062 | game_fps_label->setVisible(true); |
| 3001 | emu_frametime_label->setVisible(true); | 3063 | emu_frametime_label->setVisible(true); |
| @@ -3025,11 +3087,55 @@ void GMainWindow::UpdateGPUAccuracyButton() { | |||
| 3025 | } | 3087 | } |
| 3026 | } | 3088 | } |
| 3027 | 3089 | ||
| 3090 | void GMainWindow::UpdateFilterText() { | ||
| 3091 | const auto filter = Settings::values.scaling_filter.GetValue(); | ||
| 3092 | switch (filter) { | ||
| 3093 | case Settings::ScalingFilter::NearestNeighbor: | ||
| 3094 | filter_status_button->setText(tr("NEAREST")); | ||
| 3095 | break; | ||
| 3096 | case Settings::ScalingFilter::Bilinear: | ||
| 3097 | filter_status_button->setText(tr("BILINEAR")); | ||
| 3098 | break; | ||
| 3099 | case Settings::ScalingFilter::Bicubic: | ||
| 3100 | filter_status_button->setText(tr("BICUBIC")); | ||
| 3101 | break; | ||
| 3102 | case Settings::ScalingFilter::Gaussian: | ||
| 3103 | filter_status_button->setText(tr("GAUSSIAN")); | ||
| 3104 | break; | ||
| 3105 | case Settings::ScalingFilter::ScaleForce: | ||
| 3106 | filter_status_button->setText(tr("SCALEFORCE")); | ||
| 3107 | break; | ||
| 3108 | case Settings::ScalingFilter::Fsr: | ||
| 3109 | filter_status_button->setText(tr("AMD'S FIDELITYFX SR")); | ||
| 3110 | break; | ||
| 3111 | default: | ||
| 3112 | filter_status_button->setText(tr("BILINEAR")); | ||
| 3113 | break; | ||
| 3114 | } | ||
| 3115 | } | ||
| 3116 | |||
| 3117 | void GMainWindow::UpdateAAText() { | ||
| 3118 | const auto aa_mode = Settings::values.anti_aliasing.GetValue(); | ||
| 3119 | switch (aa_mode) { | ||
| 3120 | case Settings::AntiAliasing::Fxaa: | ||
| 3121 | aa_status_button->setText(tr("FXAA")); | ||
| 3122 | break; | ||
| 3123 | case Settings::AntiAliasing::None: | ||
| 3124 | aa_status_button->setText(tr("NO AA")); | ||
| 3125 | break; | ||
| 3126 | default: | ||
| 3127 | aa_status_button->setText(tr("FXAA")); | ||
| 3128 | break; | ||
| 3129 | } | ||
| 3130 | } | ||
| 3131 | |||
| 3028 | void GMainWindow::UpdateStatusButtons() { | 3132 | void GMainWindow::UpdateStatusButtons() { |
| 3029 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); | 3133 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); |
| 3030 | renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == | 3134 | renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == |
| 3031 | Settings::RendererBackend::Vulkan); | 3135 | Settings::RendererBackend::Vulkan); |
| 3032 | UpdateGPUAccuracyButton(); | 3136 | UpdateGPUAccuracyButton(); |
| 3137 | UpdateFilterText(); | ||
| 3138 | UpdateAAText(); | ||
| 3033 | } | 3139 | } |
| 3034 | 3140 | ||
| 3035 | void GMainWindow::UpdateUISettings() { | 3141 | void GMainWindow::UpdateUISettings() { |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 981102daa..24633ff2d 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -302,6 +302,8 @@ private: | |||
| 302 | void MigrateConfigFiles(); | 302 | void MigrateConfigFiles(); |
| 303 | void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, | 303 | void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, |
| 304 | std::string_view gpu_vendor = {}); | 304 | std::string_view gpu_vendor = {}); |
| 305 | void UpdateFilterText(); | ||
| 306 | void UpdateAAText(); | ||
| 305 | void UpdateStatusBar(); | 307 | void UpdateStatusBar(); |
| 306 | void UpdateGPUAccuracyButton(); | 308 | void UpdateGPUAccuracyButton(); |
| 307 | void UpdateStatusButtons(); | 309 | void UpdateStatusButtons(); |
| @@ -328,6 +330,7 @@ private: | |||
| 328 | // Status bar elements | 330 | // Status bar elements |
| 329 | QLabel* message_label = nullptr; | 331 | QLabel* message_label = nullptr; |
| 330 | QLabel* shader_building_label = nullptr; | 332 | QLabel* shader_building_label = nullptr; |
| 333 | QLabel* res_scale_label = nullptr; | ||
| 331 | QLabel* emu_speed_label = nullptr; | 334 | QLabel* emu_speed_label = nullptr; |
| 332 | QLabel* game_fps_label = nullptr; | 335 | QLabel* game_fps_label = nullptr; |
| 333 | QLabel* emu_frametime_label = nullptr; | 336 | QLabel* emu_frametime_label = nullptr; |
| @@ -335,6 +338,8 @@ private: | |||
| 335 | QPushButton* gpu_accuracy_button = nullptr; | 338 | QPushButton* gpu_accuracy_button = nullptr; |
| 336 | QPushButton* renderer_status_button = nullptr; | 339 | QPushButton* renderer_status_button = nullptr; |
| 337 | QPushButton* dock_status_button = nullptr; | 340 | QPushButton* dock_status_button = nullptr; |
| 341 | QPushButton* filter_status_button = nullptr; | ||
| 342 | QPushButton* aa_status_button = nullptr; | ||
| 338 | QTimer status_bar_update_timer; | 343 | QTimer status_bar_update_timer; |
| 339 | 344 | ||
| 340 | std::unique_ptr<Config> config; | 345 | std::unique_ptr<Config> config; |
diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index cac19452f..936914ef3 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h | |||
| @@ -68,7 +68,6 @@ struct Values { | |||
| 68 | Settings::BasicSetting<bool> enable_discord_presence{true, "enable_discord_presence"}; | 68 | Settings::BasicSetting<bool> enable_discord_presence{true, "enable_discord_presence"}; |
| 69 | 69 | ||
| 70 | Settings::BasicSetting<bool> enable_screenshot_save_as{true, "enable_screenshot_save_as"}; | 70 | Settings::BasicSetting<bool> enable_screenshot_save_as{true, "enable_screenshot_save_as"}; |
| 71 | Settings::BasicSetting<u16> screenshot_resolution_factor{0, "screenshot_resolution_factor"}; | ||
| 72 | 71 | ||
| 73 | QString roms_path; | 72 | QString roms_path; |
| 74 | QString symbols_path; | 73 | QString symbols_path; |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 0b8fde691..33241ea98 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -451,6 +451,9 @@ void Config::ReadValues() { | |||
| 451 | ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); | 451 | ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); |
| 452 | ReadSetting("Renderer", Settings::values.vulkan_device); | 452 | ReadSetting("Renderer", Settings::values.vulkan_device); |
| 453 | 453 | ||
| 454 | ReadSetting("Renderer", Settings::values.resolution_setup); | ||
| 455 | ReadSetting("Renderer", Settings::values.scaling_filter); | ||
| 456 | ReadSetting("Renderer", Settings::values.anti_aliasing); | ||
| 454 | ReadSetting("Renderer", Settings::values.fullscreen_mode); | 457 | ReadSetting("Renderer", Settings::values.fullscreen_mode); |
| 455 | ReadSetting("Renderer", Settings::values.aspect_ratio); | 458 | ReadSetting("Renderer", Settings::values.aspect_ratio); |
| 456 | ReadSetting("Renderer", Settings::values.max_anisotropy); | 459 | ReadSetting("Renderer", Settings::values.max_anisotropy); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 339dca766..ecdc271a8 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -236,6 +236,29 @@ disable_shader_loop_safety_checks = | |||
| 236 | # Which Vulkan physical device to use (defaults to 0) | 236 | # Which Vulkan physical device to use (defaults to 0) |
| 237 | vulkan_device = | 237 | vulkan_device = |
| 238 | 238 | ||
| 239 | # 0: 0.5x (360p/540p) [EXPERIMENTAL] | ||
| 240 | # 1: 0.75x (540p/810p) [EXPERIMENTAL] | ||
| 241 | # 2 (default): 1x (720p/1080p) | ||
| 242 | # 3: 2x (1440p/2160p) | ||
| 243 | # 4: 3x (2160p/3240p) | ||
| 244 | # 5: 4x (2880p/4320p) | ||
| 245 | # 6: 5x (3600p/5400p) | ||
| 246 | # 7: 6x (4320p/6480p) | ||
| 247 | resolution_setup = | ||
| 248 | |||
| 249 | # Pixel filter to use when up- or down-sampling rendered frames. | ||
| 250 | # 0: Nearest Neighbor | ||
| 251 | # 1 (default): Bilinear | ||
| 252 | # 2: Bicubic | ||
| 253 | # 3: Gaussian | ||
| 254 | # 4: ScaleForce | ||
| 255 | # 5: AMD FidelityFX™️ Super Resolution [Vulkan Only] | ||
| 256 | scaling_filter = | ||
| 257 | |||
| 258 | # Anti-Aliasing (AA) | ||
| 259 | # 0 (default): None, 1: FXAA | ||
| 260 | anti_aliasing = | ||
| 261 | |||
| 239 | # Whether to use fullscreen or borderless window mode | 262 | # Whether to use fullscreen or borderless window mode |
| 240 | # 0 (Windows default): Borderless window, 1 (All other default): Exclusive fullscreen | 263 | # 0 (Windows default): Borderless window, 1 (All other default): Exclusive fullscreen |
| 241 | fullscreen_mode = | 264 | fullscreen_mode = |