diff options
Diffstat (limited to 'src/shader_recompiler/backend')
5 files changed, 64 insertions, 63 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index d3301054c..9714ffe33 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp | |||
| @@ -20,7 +20,7 @@ static constexpr std::string_view cas_loop{R"(for (;;){{ | |||
| 20 | void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | 20 | void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, |
| 21 | std::string_view value, std::string_view function) { | 21 | std::string_view value, std::string_view function) { |
| 22 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | 22 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; |
| 23 | const std::string smem{fmt::format("smem[{}/4]", offset)}; | 23 | const std::string smem{fmt::format("smem[{}>>2]", offset)}; |
| 24 | ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); | 24 | ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| @@ -45,7 +45,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi | |||
| 45 | 45 | ||
| 46 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 46 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 47 | std::string_view value) { | 47 | std::string_view value) { |
| 48 | ctx.AddU32("{}=atomicAdd(smem[{}/4],{});", inst, pointer_offset, value); | 48 | ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 51 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| @@ -56,7 +56,7 @@ void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view p | |||
| 56 | 56 | ||
| 57 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 57 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 58 | std::string_view value) { | 58 | std::string_view value) { |
| 59 | ctx.AddU32("{}=atomicMin(smem[{}/4],{});", inst, pointer_offset, value); | 59 | ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 62 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| @@ -67,7 +67,7 @@ void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view p | |||
| 67 | 67 | ||
| 68 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 68 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 69 | std::string_view value) { | 69 | std::string_view value) { |
| 70 | ctx.AddU32("{}=atomicMax(smem[{}/4],{});", inst, pointer_offset, value); | 70 | ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 73 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| @@ -82,31 +82,31 @@ void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view po | |||
| 82 | 82 | ||
| 83 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 83 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 84 | std::string_view value) { | 84 | std::string_view value) { |
| 85 | ctx.AddU32("{}=atomicAnd(smem[{}/4],{});", inst, pointer_offset, value); | 85 | ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 88 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 89 | std::string_view value) { | 89 | std::string_view value) { |
| 90 | ctx.AddU32("{}=atomicOr(smem[{}/4],{});", inst, pointer_offset, value); | 90 | ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 93 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 94 | std::string_view value) { | 94 | std::string_view value) { |
| 95 | ctx.AddU32("{}=atomicXor(smem[{}/4],{});", inst, pointer_offset, value); | 95 | ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 98 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 99 | std::string_view value) { | 99 | std::string_view value) { |
| 100 | ctx.AddU32("{}=atomicExchange(smem[{}/4],{});", inst, pointer_offset, value); | 100 | ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | 103 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, |
| 104 | std::string_view value) { | 104 | std::string_view value) { |
| 105 | // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); | 105 | // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); |
| 106 | ctx.AddU64("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", inst, pointer_offset, | 106 | ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, |
| 107 | pointer_offset); | 107 | pointer_offset); |
| 108 | ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset, | 108 | ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", |
| 109 | value, pointer_offset, value); | 109 | pointer_offset, value, pointer_offset, value); |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 112 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 8f5f94752..8d2abdd94 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -31,7 +31,7 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& | |||
| 31 | } else { | 31 | } else { |
| 32 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 32 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 33 | ctx.AddU32( | 33 | ctx.AddU32( |
| 34 | "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", | 34 | "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", |
| 35 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 35 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |
| 36 | } | 36 | } |
| 37 | } | 37 | } |
| @@ -46,8 +46,8 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& | |||
| 46 | } else { | 46 | } else { |
| 47 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 47 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 48 | ctx.AddU32( | 48 | ctx.AddU32( |
| 49 | "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, | 49 | "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", |
| 50 | ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 50 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |
| 51 | } | 51 | } |
| 52 | } | 52 | } |
| 53 | 53 | ||
| @@ -60,7 +60,7 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst | |||
| 60 | ((offset.U32() / 2) % 2) * 16); | 60 | ((offset.U32() / 2) % 2) * 16); |
| 61 | } else { | 61 | } else { |
| 62 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 62 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 63 | ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/" | 63 | ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" |
| 64 | "2)%2)*16),16);", | 64 | "2)%2)*16),16);", |
| 65 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 65 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |
| 66 | } | 66 | } |
| @@ -75,9 +75,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst | |||
| 75 | ((offset.U32() / 2) % 2) * 16); | 75 | ((offset.U32() / 2) % 2) * 16); |
| 76 | } else { | 76 | } else { |
| 77 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 77 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 78 | ctx.AddU32( | 78 | ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" |
| 79 | "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);", | 79 | "2)%2)*16),16);", |
| 80 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 80 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); |
| 81 | } | 81 | } |
| 82 | } | 82 | } |
| 83 | 83 | ||
| @@ -88,7 +88,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | |||
| 88 | offset.U32() / 16, OffsetSwizzle(offset.U32())); | 88 | offset.U32() / 16, OffsetSwizzle(offset.U32())); |
| 89 | } else { | 89 | } else { |
| 90 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 90 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 91 | ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name, | 91 | ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, |
| 92 | binding.U32(), offset_var, offset_var); | 92 | binding.U32(), offset_var, offset_var); |
| 93 | } | 93 | } |
| 94 | } | 94 | } |
| @@ -100,7 +100,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | |||
| 100 | OffsetSwizzle(offset.U32())); | 100 | OffsetSwizzle(offset.U32())); |
| 101 | } else { | 101 | } else { |
| 102 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 102 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 103 | ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(), | 103 | ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(), |
| 104 | offset_var, offset_var); | 104 | offset_var, offset_var); |
| 105 | } | 105 | } |
| 106 | } | 106 | } |
| @@ -116,7 +116,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding | |||
| 116 | } else { | 116 | } else { |
| 117 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 117 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 118 | ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" | 118 | ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" |
| 119 | "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));", | 119 | "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", |
| 120 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, | 120 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, |
| 121 | binding.U32(), offset_var, offset_var); | 121 | binding.U32(), offset_var, offset_var); |
| 122 | } | 122 | } |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 78fbb9d6e..a4411b68b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp | |||
| @@ -13,7 +13,7 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||
| 13 | [[maybe_unused]] const IR::Value& binding, | 13 | [[maybe_unused]] const IR::Value& binding, |
| 14 | [[maybe_unused]] const IR::Value& offset) { | 14 | [[maybe_unused]] const IR::Value& offset) { |
| 15 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 15 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 16 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int({}%4)*8,8);", inst, ctx.stage_name, | 16 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, |
| 17 | binding.U32(), offset_var, offset_var); | 17 | binding.U32(), offset_var, offset_var); |
| 18 | } | 18 | } |
| 19 | 19 | ||
| @@ -21,7 +21,7 @@ void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||
| 21 | [[maybe_unused]] const IR::Value& binding, | 21 | [[maybe_unused]] const IR::Value& binding, |
| 22 | [[maybe_unused]] const IR::Value& offset) { | 22 | [[maybe_unused]] const IR::Value& offset) { |
| 23 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 23 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 24 | ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int({}%4)*8,8);", inst, ctx.stage_name, | 24 | ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, |
| 25 | binding.U32(), offset_var, offset_var); | 25 | binding.U32(), offset_var, offset_var); |
| 26 | } | 26 | } |
| 27 | 27 | ||
| @@ -29,7 +29,7 @@ void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||
| 29 | [[maybe_unused]] const IR::Value& binding, | 29 | [[maybe_unused]] const IR::Value& binding, |
| 30 | [[maybe_unused]] const IR::Value& offset) { | 30 | [[maybe_unused]] const IR::Value& offset) { |
| 31 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 31 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 32 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, ctx.stage_name, | 32 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, |
| 33 | binding.U32(), offset_var, offset_var); | 33 | binding.U32(), offset_var, offset_var); |
| 34 | } | 34 | } |
| 35 | 35 | ||
| @@ -37,30 +37,31 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, | |||
| 37 | [[maybe_unused]] const IR::Value& binding, | 37 | [[maybe_unused]] const IR::Value& binding, |
| 38 | [[maybe_unused]] const IR::Value& offset) { | 38 | [[maybe_unused]] const IR::Value& offset) { |
| 39 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 39 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 40 | ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, | 40 | ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, |
| 41 | ctx.stage_name, binding.U32(), offset_var, offset_var); | 41 | ctx.stage_name, binding.U32(), offset_var, offset_var); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 44 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 45 | const IR::Value& offset) { | 45 | const IR::Value& offset) { |
| 46 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 46 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 47 | ctx.AddU32("{}={}_ssbo{}[{}/4];", inst, ctx.stage_name, binding.U32(), offset_var); | 47 | ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 50 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 51 | const IR::Value& offset) { | 51 | const IR::Value& offset) { |
| 52 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 52 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 53 | ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4]);", inst, ctx.stage_name, | 53 | ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, |
| 54 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); | 54 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 57 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 58 | const IR::Value& offset) { | 58 | const IR::Value& offset) { |
| 59 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 59 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 60 | ctx.AddU32x4( | 60 | ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" |
| 61 | "{}=uvec4({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4],{}_ssbo{}[({}+8)/4],{}_ssbo{}[({}+12)/4]);", | 61 | "+12)>>2]);", |
| 62 | inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, | 62 | inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), |
| 63 | ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); | 63 | offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, |
| 64 | binding.U32(), offset_var); | ||
| 64 | } | 65 | } |
| 65 | 66 | ||
| 66 | void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, | 67 | void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, |
| @@ -68,7 +69,7 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, | |||
| 68 | [[maybe_unused]] const IR::Value& offset, | 69 | [[maybe_unused]] const IR::Value& offset, |
| 69 | [[maybe_unused]] std::string_view value) { | 70 | [[maybe_unused]] std::string_view value) { |
| 70 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 71 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 71 | ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, | 72 | ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, |
| 72 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, | 73 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, |
| 73 | offset_var); | 74 | offset_var); |
| 74 | } | 75 | } |
| @@ -78,7 +79,7 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, | |||
| 78 | [[maybe_unused]] const IR::Value& offset, | 79 | [[maybe_unused]] const IR::Value& offset, |
| 79 | [[maybe_unused]] std::string_view value) { | 80 | [[maybe_unused]] std::string_view value) { |
| 80 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 81 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 81 | ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, | 82 | ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, |
| 82 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, | 83 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, |
| 83 | offset_var); | 84 | offset_var); |
| 84 | } | 85 | } |
| @@ -88,7 +89,7 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, | |||
| 88 | [[maybe_unused]] const IR::Value& offset, | 89 | [[maybe_unused]] const IR::Value& offset, |
| 89 | [[maybe_unused]] std::string_view value) { | 90 | [[maybe_unused]] std::string_view value) { |
| 90 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 91 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 91 | ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", | 92 | ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", |
| 92 | ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, | 93 | ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, |
| 93 | value, offset_var); | 94 | value, offset_var); |
| 94 | } | 95 | } |
| @@ -98,7 +99,7 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, | |||
| 98 | [[maybe_unused]] const IR::Value& offset, | 99 | [[maybe_unused]] const IR::Value& offset, |
| 99 | [[maybe_unused]] std::string_view value) { | 100 | [[maybe_unused]] std::string_view value) { |
| 100 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 101 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 101 | ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", | 102 | ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", |
| 102 | ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, | 103 | ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, |
| 103 | value, offset_var); | 104 | value, offset_var); |
| 104 | } | 105 | } |
| @@ -106,14 +107,14 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, | |||
| 106 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 107 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 107 | std::string_view value) { | 108 | std::string_view value) { |
| 108 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 109 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 109 | ctx.Add("{}_ssbo{}[{}/4]={};", ctx.stage_name, binding.U32(), offset_var, value); | 110 | ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); |
| 110 | } | 111 | } |
| 111 | 112 | ||
| 112 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 113 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 113 | std::string_view value) { | 114 | std::string_view value) { |
| 114 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 115 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 115 | ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | 116 | ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); |
| 116 | ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | 117 | ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); |
| 117 | } | 118 | } |
| 118 | 119 | ||
| 119 | void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, | 120 | void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, |
| @@ -121,9 +122,9 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, | |||
| 121 | [[maybe_unused]] const IR::Value& offset, | 122 | [[maybe_unused]] const IR::Value& offset, |
| 122 | [[maybe_unused]] std::string_view value) { | 123 | [[maybe_unused]] std::string_view value) { |
| 123 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 124 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 124 | ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | 125 | ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); |
| 125 | ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | 126 | ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); |
| 126 | ctx.Add("{}_ssbo{}[({}+8)/4]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); | 127 | ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); |
| 127 | ctx.Add("{}_ssbo{}[({}+12)/4]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); | 128 | ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); |
| 128 | } | 129 | } |
| 129 | } // namespace Shader::Backend::GLSL | 130 | } // namespace Shader::Backend::GLSL |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 8a4c69547..578bc349f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp | |||
| @@ -11,70 +11,70 @@ | |||
| 11 | namespace Shader::Backend::GLSL { | 11 | namespace Shader::Backend::GLSL { |
| 12 | void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 12 | void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 13 | [[maybe_unused]] std::string_view offset) { | 13 | [[maybe_unused]] std::string_view offset) { |
| 14 | ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int({}%4)*8,8);", inst, offset, offset); | 14 | ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 17 | void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 18 | [[maybe_unused]] std::string_view offset) { | 18 | [[maybe_unused]] std::string_view offset) { |
| 19 | ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int({}%4)*8,8);", inst, offset, offset); | 19 | ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 22 | void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 23 | [[maybe_unused]] std::string_view offset) { | 23 | [[maybe_unused]] std::string_view offset) { |
| 24 | ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int(({}/2)%2)*16,16);", inst, offset, offset); | 24 | ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 27 | void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 28 | [[maybe_unused]] std::string_view offset) { | 28 | [[maybe_unused]] std::string_view offset) { |
| 29 | ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int(({}/2)%2)*16,16);", inst, offset, offset); | 29 | ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 32 | void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 33 | [[maybe_unused]] std::string_view offset) { | 33 | [[maybe_unused]] std::string_view offset) { |
| 34 | ctx.AddU32("{}=smem[{}/4];", inst, offset); | 34 | ctx.AddU32("{}=smem[{}>>2];", inst, offset); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 37 | void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 38 | [[maybe_unused]] std::string_view offset) { | 38 | [[maybe_unused]] std::string_view offset) { |
| 39 | ctx.AddU32x2("{}=uvec2(smem[{}/4],smem[({}+4)/4]);", inst, offset, offset); | 39 | ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 42 | void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 43 | [[maybe_unused]] std::string_view offset) { | 43 | [[maybe_unused]] std::string_view offset) { |
| 44 | ctx.AddU32x4("{}=uvec4(smem[{}/4],smem[({}+4)/4],smem[({}+8)/4],smem[({}+12)/4]);", inst, | 44 | ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, |
| 45 | offset, offset, offset, offset); | 45 | offset, offset, offset, offset); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | 48 | void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, |
| 49 | [[maybe_unused]] std::string_view value) { | 49 | [[maybe_unused]] std::string_view value) { |
| 50 | ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int({}%4)*8,8);", offset, offset, value, | 50 | ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, |
| 51 | offset); | 51 | offset); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | 54 | void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, |
| 55 | [[maybe_unused]] std::string_view value) { | 55 | [[maybe_unused]] std::string_view value) { |
| 56 | ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int(({}/2)%2)*16,16);", offset, offset, value, | 56 | ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, |
| 57 | offset); | 57 | value, offset); |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | 60 | void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, |
| 61 | [[maybe_unused]] std::string_view value) { | 61 | [[maybe_unused]] std::string_view value) { |
| 62 | ctx.Add("smem[{}/4]={};", offset, value); | 62 | ctx.Add("smem[{}>>2]={};", offset, value); |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, | 65 | void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, |
| 66 | [[maybe_unused]] std::string_view value) { | 66 | [[maybe_unused]] std::string_view value) { |
| 67 | ctx.Add("smem[{}/4]={}.x;", offset, value); | 67 | ctx.Add("smem[{}>>2]={}.x;", offset, value); |
| 68 | ctx.Add("smem[({}+4)/4]={}.y;", offset, value); | 68 | ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, | 71 | void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, |
| 72 | [[maybe_unused]] std::string_view offset, | 72 | [[maybe_unused]] std::string_view offset, |
| 73 | [[maybe_unused]] std::string_view value) { | 73 | [[maybe_unused]] std::string_view value) { |
| 74 | ctx.Add("smem[{}/4]={}.x;", offset, value); | 74 | ctx.Add("smem[{}>>2]={}.x;", offset, value); |
| 75 | ctx.Add("smem[({}+4)/4]={}.y;", offset, value); | 75 | ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); |
| 76 | ctx.Add("smem[({}+8)/4]={}.z;", offset, value); | 76 | ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); |
| 77 | ctx.Add("smem[({}+12)/4]={}.w;", offset, value); | 77 | ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | } // namespace Shader::Backend::GLSL | 80 | } // namespace Shader::Backend::GLSL |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 4286f29c7..0b6c5ad82 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -112,7 +112,7 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 112 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | 112 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; |
| 113 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 113 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 114 | SetInBoundsFlag(ctx, inst); | 114 | SetInBoundsFlag(ctx, inst); |
| 115 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); | 115 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | 118 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, |
| @@ -122,7 +122,7 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std | |||
| 122 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | 122 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; |
| 123 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | 123 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); |
| 124 | SetInBoundsFlag(ctx, inst); | 124 | SetInBoundsFlag(ctx, inst); |
| 125 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); | 125 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 128 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| @@ -133,7 +133,7 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 133 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | 133 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; |
| 134 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 134 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 135 | SetInBoundsFlag(ctx, inst); | 135 | SetInBoundsFlag(ctx, inst); |
| 136 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); | 136 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 139 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| @@ -144,7 +144,7 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val | |||
| 144 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | 144 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; |
| 145 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 145 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 146 | SetInBoundsFlag(ctx, inst); | 146 | SetInBoundsFlag(ctx, inst); |
| 147 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); | 147 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 150 | void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |