diff options
| author | 2021-05-25 02:01:32 -0400 | |
|---|---|---|
| committer | 2021-07-22 21:51:36 -0400 | |
| commit | 3482df1176203b4999353e8266f42032536b561c (patch) | |
| tree | f3cf53e6d1c8d3c6bbe49ec90dd10c870ecf1169 /src/shader_recompiler/backend/glsl | |
| parent | glsl: F16x2 storage atomics (diff) | |
| download | yuzu-3482df1176203b4999353e8266f42032536b561c.tar.gz yuzu-3482df1176203b4999353e8266f42032536b561c.tar.xz yuzu-3482df1176203b4999353e8266f42032536b561c.zip | |
glsl: Simply FP storage atomics
Diffstat (limited to 'src/shader_recompiler/backend/glsl')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.cpp | 28 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp | 48 |
2 files changed, 28 insertions, 48 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index a413219e3..9c3fd44ba 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp | |||
| @@ -78,32 +78,32 @@ void EmitContext::DefineHelperFunctions() { | |||
| 78 | "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; | 78 | "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; |
| 79 | } | 79 | } |
| 80 | if (info.uses_atomic_f32_add) { | 80 | if (info.uses_atomic_f32_add) { |
| 81 | code += "uint CasFloatAdd(uint op_a,uint op_b){return " | 81 | code += "uint CasFloatAdd(uint op_a,float op_b){return " |
| 82 | "floatBitsToUint(uintBitsToFloat(op_a)+uintBitsToFloat(op_b));}\n"; | 82 | "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; |
| 83 | } | 83 | } |
| 84 | if (info.uses_atomic_f32x2_add) { | 84 | if (info.uses_atomic_f32x2_add) { |
| 85 | code += "uint CasFloatAdd32x2(uint op_a,uint op_b){return " | 85 | code += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " |
| 86 | "packHalf2x16(unpackHalf2x16(op_a)+unpackHalf2x16(op_b));}\n"; | 86 | "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; |
| 87 | } | 87 | } |
| 88 | if (info.uses_atomic_f32x2_min) { | 88 | if (info.uses_atomic_f32x2_min) { |
| 89 | code += "uint CasFloatMin32x2(uint op_a,uint op_b){return " | 89 | code += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " |
| 90 | "packHalf2x16(min(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; | 90 | "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; |
| 91 | } | 91 | } |
| 92 | if (info.uses_atomic_f32x2_max) { | 92 | if (info.uses_atomic_f32x2_max) { |
| 93 | code += "uint CasFloatMax32x2(uint op_a,uint op_b){return " | 93 | code += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " |
| 94 | "packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; | 94 | "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; |
| 95 | } | 95 | } |
| 96 | if (info.uses_atomic_f16x2_add) { | 96 | if (info.uses_atomic_f16x2_add) { |
| 97 | code += "uint CasFloatAdd16x2(uint op_a,uint op_b){return " | 97 | code += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " |
| 98 | "packFloat2x16(unpackFloat2x16(op_a)+unpackFloat2x16(op_b));}\n"; | 98 | "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; |
| 99 | } | 99 | } |
| 100 | if (info.uses_atomic_f16x2_min) { | 100 | if (info.uses_atomic_f16x2_min) { |
| 101 | code += "uint CasFloatMin16x2(uint op_a,uint op_b){return " | 101 | code += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " |
| 102 | "packFloat2x16(min(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; | 102 | "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; |
| 103 | } | 103 | } |
| 104 | if (info.uses_atomic_f16x2_max) { | 104 | if (info.uses_atomic_f16x2_max) { |
| 105 | code += "uint CasFloatMax16x2(uint op_a,uint op_b){return " | 105 | code += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " |
| 106 | "packFloat2x16(max(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; | 106 | "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; |
| 107 | } | 107 | } |
| 108 | // TODO: Track this usage | 108 | // TODO: Track this usage |
| 109 | code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; | 109 | code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index b6b326762..0c3af75f5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp | |||
| @@ -19,16 +19,11 @@ for (;;){{ | |||
| 19 | if ({}==old_value){{break;}} | 19 | if ({}==old_value){{break;}} |
| 20 | }})"}; | 20 | }})"}; |
| 21 | 21 | ||
| 22 | void CasFunction(EmitContext& ctx, std::string_view ret, std::string_view ssbo, | 22 | void CasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 23 | std::string_view value, std::string_view function) { | 23 | const IR::Value& offset, std::string_view value, std::string_view function) { |
| 24 | ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); | ||
| 25 | } | ||
| 26 | |||
| 27 | void CasFunctionInt32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 28 | const IR::Value& offset, std::string_view value, std::string_view function) { | ||
| 29 | const auto ret{ctx.reg_alloc.Define(inst)}; | 24 | const auto ret{ctx.reg_alloc.Define(inst)}; |
| 30 | const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; | 25 | const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; |
| 31 | CasFunction(ctx, ret, ssbo, value, function); | 26 | ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); |
| 32 | } | 27 | } |
| 33 | 28 | ||
| 34 | void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 29 | void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| @@ -37,25 +32,10 @@ void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | |||
| 37 | const std::string u32_value{fmt::format("floatBitsToUint({})", value)}; | 32 | const std::string u32_value{fmt::format("floatBitsToUint({})", value)}; |
| 38 | const auto ret{ctx.reg_alloc.Define(inst)}; | 33 | const auto ret{ctx.reg_alloc.Define(inst)}; |
| 39 | const auto ret_32{ret + "_u32"}; | 34 | const auto ret_32{ret + "_u32"}; |
| 40 | CasFunction(ctx, ret_32, ssbo, u32_value, function); | 35 | ctx.Add(cas_loop.data(), ret_32, ssbo, ret_32, ssbo, function, ssbo, value, ret_32); |
| 41 | ctx.Add("float {}=uintBitsToFloat({});", ret, ret_32); | 36 | ctx.Add("float {}=uintBitsToFloat({});", ret, ret_32); |
| 42 | } | 37 | } |
| 43 | 38 | ||
| 44 | void CasFunctionF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 45 | const IR::Value& offset, std::string_view value, std::string_view function) { | ||
| 46 | const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; | ||
| 47 | const std::string u32_value{fmt::format("packHalf2x16({})", value)}; | ||
| 48 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 49 | CasFunction(ctx, ret, ssbo, u32_value, function); | ||
| 50 | } | ||
| 51 | |||
| 52 | void CasFunctionF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 53 | const IR::Value& offset, std::string_view value, std::string_view function) { | ||
| 54 | const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; | ||
| 55 | const std::string u32_value{fmt::format("packFloat2x16({})", value)}; | ||
| 56 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 57 | CasFunction(ctx, ret, ssbo, u32_value, function); | ||
| 58 | } | ||
| 59 | } // namespace | 39 | } // namespace |
| 60 | 40 | ||
| 61 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 41 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| @@ -66,7 +46,7 @@ void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& | |||
| 66 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 46 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 67 | const IR::Value& offset, std::string_view value) { | 47 | const IR::Value& offset, std::string_view value) { |
| 68 | const std::string u32_value{fmt::format("uint({})", value)}; | 48 | const std::string u32_value{fmt::format("uint({})", value)}; |
| 69 | CasFunctionInt32(ctx, inst, binding, offset, u32_value, "CasMinS32"); | 49 | CasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); |
| 70 | } | 50 | } |
| 71 | 51 | ||
| 72 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 52 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| @@ -77,7 +57,7 @@ void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& | |||
| 77 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 57 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 78 | const IR::Value& offset, std::string_view value) { | 58 | const IR::Value& offset, std::string_view value) { |
| 79 | const std::string u32_value{fmt::format("uint({})", value)}; | 59 | const std::string u32_value{fmt::format("uint({})", value)}; |
| 80 | CasFunctionInt32(ctx, inst, binding, offset, u32_value, "CasMaxS32"); | 60 | CasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); |
| 81 | } | 61 | } |
| 82 | 62 | ||
| 83 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 63 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| @@ -87,12 +67,12 @@ void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& | |||
| 87 | 67 | ||
| 88 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 68 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 89 | const IR::Value& offset, std::string_view value) { | 69 | const IR::Value& offset, std::string_view value) { |
| 90 | CasFunctionInt32(ctx, inst, binding, offset, value, "CasIncrement"); | 70 | CasFunction(ctx, inst, binding, offset, value, "CasIncrement"); |
| 91 | } | 71 | } |
| 92 | 72 | ||
| 93 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 73 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 94 | const IR::Value& offset, std::string_view value) { | 74 | const IR::Value& offset, std::string_view value) { |
| 95 | CasFunctionInt32(ctx, inst, binding, offset, value, "CasDecrement"); | 75 | CasFunction(ctx, inst, binding, offset, value, "CasDecrement"); |
| 96 | } | 76 | } |
| 97 | 77 | ||
| 98 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 78 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| @@ -199,32 +179,32 @@ void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& | |||
| 199 | 179 | ||
| 200 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 180 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 201 | const IR::Value& offset, std::string_view value) { | 181 | const IR::Value& offset, std::string_view value) { |
| 202 | CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); | 182 | CasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); |
| 203 | } | 183 | } |
| 204 | 184 | ||
| 205 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 185 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 206 | const IR::Value& offset, std::string_view value) { | 186 | const IR::Value& offset, std::string_view value) { |
| 207 | CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); | 187 | CasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); |
| 208 | } | 188 | } |
| 209 | 189 | ||
| 210 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 190 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 211 | const IR::Value& offset, std::string_view value) { | 191 | const IR::Value& offset, std::string_view value) { |
| 212 | CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMin16x2"); | 192 | CasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); |
| 213 | } | 193 | } |
| 214 | 194 | ||
| 215 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 195 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 216 | const IR::Value& offset, std::string_view value) { | 196 | const IR::Value& offset, std::string_view value) { |
| 217 | CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2"); | 197 | CasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); |
| 218 | } | 198 | } |
| 219 | 199 | ||
| 220 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 200 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 221 | const IR::Value& offset, std::string_view value) { | 201 | const IR::Value& offset, std::string_view value) { |
| 222 | CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMax16x2"); | 202 | CasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); |
| 223 | } | 203 | } |
| 224 | 204 | ||
| 225 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 205 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 226 | const IR::Value& offset, std::string_view value) { | 206 | const IR::Value& offset, std::string_view value) { |
| 227 | CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2"); | 207 | CasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); |
| 228 | } | 208 | } |
| 229 | 209 | ||
| 230 | void EmitGlobalAtomicIAdd32(EmitContext&) { | 210 | void EmitGlobalAtomicIAdd32(EmitContext&) { |