summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/backend
diff options
context:
space:
mode:
authorGravatar ameerj2021-05-31 00:25:54 -0400
committerGravatar ameerj2021-07-22 21:51:37 -0400
commitb7561226edaefc79eadcfbd3df1b0344b7c4b673 (patch)
tree49515563855b3585e6d23ceec7274a8e69340615 /src/shader_recompiler/backend
parentglsl: Implement precise fp variable allocation (diff)
downloadyuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.gz
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.xz
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.zip
glsl: SHFL fix and prefer shift operations over divide in glsl shader
Diffstat (limited to 'src/shader_recompiler/backend')
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp22
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp20
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp43
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp34
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp8
5 files changed, 64 insertions, 63 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
index d3301054c..9714ffe33 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -20,7 +20,7 @@ static constexpr std::string_view cas_loop{R"(for (;;){{
20void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, 20void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
21 std::string_view value, std::string_view function) { 21 std::string_view value, std::string_view function) {
22 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; 22 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
23 const std::string smem{fmt::format("smem[{}/4]", offset)}; 23 const std::string smem{fmt::format("smem[{}>>2]", offset)};
24 ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); 24 ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret);
25} 25}
26 26
@@ -45,7 +45,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi
45 45
46void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 46void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
47 std::string_view value) { 47 std::string_view value) {
48 ctx.AddU32("{}=atomicAdd(smem[{}/4],{});", inst, pointer_offset, value); 48 ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
49} 49}
50 50
51void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 51void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
@@ -56,7 +56,7 @@ void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view p
56 56
57void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 57void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
58 std::string_view value) { 58 std::string_view value) {
59 ctx.AddU32("{}=atomicMin(smem[{}/4],{});", inst, pointer_offset, value); 59 ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
60} 60}
61 61
62void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 62void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
@@ -67,7 +67,7 @@ void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view p
67 67
68void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 68void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
69 std::string_view value) { 69 std::string_view value) {
70 ctx.AddU32("{}=atomicMax(smem[{}/4],{});", inst, pointer_offset, value); 70 ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
71} 71}
72 72
73void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 73void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
@@ -82,31 +82,31 @@ void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view po
82 82
83void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 83void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
84 std::string_view value) { 84 std::string_view value) {
85 ctx.AddU32("{}=atomicAnd(smem[{}/4],{});", inst, pointer_offset, value); 85 ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
86} 86}
87 87
88void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 88void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
89 std::string_view value) { 89 std::string_view value) {
90 ctx.AddU32("{}=atomicOr(smem[{}/4],{});", inst, pointer_offset, value); 90 ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
91} 91}
92 92
93void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 93void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
94 std::string_view value) { 94 std::string_view value) {
95 ctx.AddU32("{}=atomicXor(smem[{}/4],{});", inst, pointer_offset, value); 95 ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
96} 96}
97 97
98void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 98void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
99 std::string_view value) { 99 std::string_view value) {
100 ctx.AddU32("{}=atomicExchange(smem[{}/4],{});", inst, pointer_offset, value); 100 ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
101} 101}
102 102
103void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 103void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
104 std::string_view value) { 104 std::string_view value) {
105 // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); 105 // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic");
106 ctx.AddU64("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", inst, pointer_offset, 106 ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
107 pointer_offset); 107 pointer_offset);
108 ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset, 108 ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
109 value, pointer_offset, value); 109 pointer_offset, value, pointer_offset, value);
110} 110}
111 111
112void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 112void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 8f5f94752..8d2abdd94 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -31,7 +31,7 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst&
31 } else { 31 } else {
32 const auto offset_var{ctx.var_alloc.Consume(offset)}; 32 const auto offset_var{ctx.var_alloc.Consume(offset)};
33 ctx.AddU32( 33 ctx.AddU32(
34 "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", 34 "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);",
35 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); 35 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
36 } 36 }
37} 37}
@@ -46,8 +46,8 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst&
46 } else { 46 } else {
47 const auto offset_var{ctx.var_alloc.Consume(offset)}; 47 const auto offset_var{ctx.var_alloc.Consume(offset)};
48 ctx.AddU32( 48 ctx.AddU32(
49 "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, 49 "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);",
50 ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); 50 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
51 } 51 }
52} 52}
53 53
@@ -60,7 +60,7 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
60 ((offset.U32() / 2) % 2) * 16); 60 ((offset.U32() / 2) % 2) * 16);
61 } else { 61 } else {
62 const auto offset_var{ctx.var_alloc.Consume(offset)}; 62 const auto offset_var{ctx.var_alloc.Consume(offset)};
63 ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/" 63 ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
64 "2)%2)*16),16);", 64 "2)%2)*16),16);",
65 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); 65 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
66 } 66 }
@@ -75,9 +75,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
75 ((offset.U32() / 2) % 2) * 16); 75 ((offset.U32() / 2) % 2) * 16);
76 } else { 76 } else {
77 const auto offset_var{ctx.var_alloc.Consume(offset)}; 77 const auto offset_var{ctx.var_alloc.Consume(offset)};
78 ctx.AddU32( 78 ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
79 "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);", 79 "2)%2)*16),16);",
80 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); 80 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
81 } 81 }
82} 82}
83 83
@@ -88,7 +88,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
88 offset.U32() / 16, OffsetSwizzle(offset.U32())); 88 offset.U32() / 16, OffsetSwizzle(offset.U32()));
89 } else { 89 } else {
90 const auto offset_var{ctx.var_alloc.Consume(offset)}; 90 const auto offset_var{ctx.var_alloc.Consume(offset)};
91 ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name, 91 ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name,
92 binding.U32(), offset_var, offset_var); 92 binding.U32(), offset_var, offset_var);
93 } 93 }
94} 94}
@@ -100,7 +100,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
100 OffsetSwizzle(offset.U32())); 100 OffsetSwizzle(offset.U32()));
101 } else { 101 } else {
102 const auto offset_var{ctx.var_alloc.Consume(offset)}; 102 const auto offset_var{ctx.var_alloc.Consume(offset)};
103 ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(), 103 ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(),
104 offset_var, offset_var); 104 offset_var, offset_var);
105 } 105 }
106} 106}
@@ -116,7 +116,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
116 } else { 116 } else {
117 const auto offset_var{ctx.var_alloc.Consume(offset)}; 117 const auto offset_var{ctx.var_alloc.Consume(offset)};
118 ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" 118 ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/"
119 "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));", 119 "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));",
120 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, 120 inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name,
121 binding.U32(), offset_var, offset_var); 121 binding.U32(), offset_var, offset_var);
122 } 122 }
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
index 78fbb9d6e..a4411b68b 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -13,7 +13,7 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
13 [[maybe_unused]] const IR::Value& binding, 13 [[maybe_unused]] const IR::Value& binding,
14 [[maybe_unused]] const IR::Value& offset) { 14 [[maybe_unused]] const IR::Value& offset) {
15 const auto offset_var{ctx.var_alloc.Consume(offset)}; 15 const auto offset_var{ctx.var_alloc.Consume(offset)};
16 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int({}%4)*8,8);", inst, ctx.stage_name, 16 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
17 binding.U32(), offset_var, offset_var); 17 binding.U32(), offset_var, offset_var);
18} 18}
19 19
@@ -21,7 +21,7 @@ void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
21 [[maybe_unused]] const IR::Value& binding, 21 [[maybe_unused]] const IR::Value& binding,
22 [[maybe_unused]] const IR::Value& offset) { 22 [[maybe_unused]] const IR::Value& offset) {
23 const auto offset_var{ctx.var_alloc.Consume(offset)}; 23 const auto offset_var{ctx.var_alloc.Consume(offset)};
24 ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int({}%4)*8,8);", inst, ctx.stage_name, 24 ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
25 binding.U32(), offset_var, offset_var); 25 binding.U32(), offset_var, offset_var);
26} 26}
27 27
@@ -29,7 +29,7 @@ void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
29 [[maybe_unused]] const IR::Value& binding, 29 [[maybe_unused]] const IR::Value& binding,
30 [[maybe_unused]] const IR::Value& offset) { 30 [[maybe_unused]] const IR::Value& offset) {
31 const auto offset_var{ctx.var_alloc.Consume(offset)}; 31 const auto offset_var{ctx.var_alloc.Consume(offset)};
32 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, ctx.stage_name, 32 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
33 binding.U32(), offset_var, offset_var); 33 binding.U32(), offset_var, offset_var);
34} 34}
35 35
@@ -37,30 +37,31 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
37 [[maybe_unused]] const IR::Value& binding, 37 [[maybe_unused]] const IR::Value& binding,
38 [[maybe_unused]] const IR::Value& offset) { 38 [[maybe_unused]] const IR::Value& offset) {
39 const auto offset_var{ctx.var_alloc.Consume(offset)}; 39 const auto offset_var{ctx.var_alloc.Consume(offset)};
40 ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, 40 ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
41 ctx.stage_name, binding.U32(), offset_var, offset_var); 41 ctx.stage_name, binding.U32(), offset_var, offset_var);
42} 42}
43 43
44void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 44void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
45 const IR::Value& offset) { 45 const IR::Value& offset) {
46 const auto offset_var{ctx.var_alloc.Consume(offset)}; 46 const auto offset_var{ctx.var_alloc.Consume(offset)};
47 ctx.AddU32("{}={}_ssbo{}[{}/4];", inst, ctx.stage_name, binding.U32(), offset_var); 47 ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
48} 48}
49 49
50void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 50void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
51 const IR::Value& offset) { 51 const IR::Value& offset) {
52 const auto offset_var{ctx.var_alloc.Consume(offset)}; 52 const auto offset_var{ctx.var_alloc.Consume(offset)};
53 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4]);", inst, ctx.stage_name, 53 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
54 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); 54 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
55} 55}
56 56
57void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 57void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
58 const IR::Value& offset) { 58 const IR::Value& offset) {
59 const auto offset_var{ctx.var_alloc.Consume(offset)}; 59 const auto offset_var{ctx.var_alloc.Consume(offset)};
60 ctx.AddU32x4( 60 ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
61 "{}=uvec4({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4],{}_ssbo{}[({}+8)/4],{}_ssbo{}[({}+12)/4]);", 61 "+12)>>2]);",
62 inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, 62 inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
63 ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); 63 offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
64 binding.U32(), offset_var);
64} 65}
65 66
66void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, 67void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx,
@@ -68,7 +69,7 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx,
68 [[maybe_unused]] const IR::Value& offset, 69 [[maybe_unused]] const IR::Value& offset,
69 [[maybe_unused]] std::string_view value) { 70 [[maybe_unused]] std::string_view value) {
70 const auto offset_var{ctx.var_alloc.Consume(offset)}; 71 const auto offset_var{ctx.var_alloc.Consume(offset)};
71 ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, 72 ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name,
72 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, 73 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value,
73 offset_var); 74 offset_var);
74} 75}
@@ -78,7 +79,7 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx,
78 [[maybe_unused]] const IR::Value& offset, 79 [[maybe_unused]] const IR::Value& offset,
79 [[maybe_unused]] std::string_view value) { 80 [[maybe_unused]] std::string_view value) {
80 const auto offset_var{ctx.var_alloc.Consume(offset)}; 81 const auto offset_var{ctx.var_alloc.Consume(offset)};
81 ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, 82 ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name,
82 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, 83 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value,
83 offset_var); 84 offset_var);
84} 85}
@@ -88,7 +89,7 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx,
88 [[maybe_unused]] const IR::Value& offset, 89 [[maybe_unused]] const IR::Value& offset,
89 [[maybe_unused]] std::string_view value) { 90 [[maybe_unused]] std::string_view value) {
90 const auto offset_var{ctx.var_alloc.Consume(offset)}; 91 const auto offset_var{ctx.var_alloc.Consume(offset)};
91 ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", 92 ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);",
92 ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, 93 ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var,
93 value, offset_var); 94 value, offset_var);
94} 95}
@@ -98,7 +99,7 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx,
98 [[maybe_unused]] const IR::Value& offset, 99 [[maybe_unused]] const IR::Value& offset,
99 [[maybe_unused]] std::string_view value) { 100 [[maybe_unused]] std::string_view value) {
100 const auto offset_var{ctx.var_alloc.Consume(offset)}; 101 const auto offset_var{ctx.var_alloc.Consume(offset)};
101 ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", 102 ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);",
102 ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, 103 ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var,
103 value, offset_var); 104 value, offset_var);
104} 105}
@@ -106,14 +107,14 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx,
106void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 107void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
107 std::string_view value) { 108 std::string_view value) {
108 const auto offset_var{ctx.var_alloc.Consume(offset)}; 109 const auto offset_var{ctx.var_alloc.Consume(offset)};
109 ctx.Add("{}_ssbo{}[{}/4]={};", ctx.stage_name, binding.U32(), offset_var, value); 110 ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
110} 111}
111 112
112void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 113void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
113 std::string_view value) { 114 std::string_view value) {
114 const auto offset_var{ctx.var_alloc.Consume(offset)}; 115 const auto offset_var{ctx.var_alloc.Consume(offset)};
115 ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); 116 ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
116 ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); 117 ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
117} 118}
118 119
119void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, 120void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx,
@@ -121,9 +122,9 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx,
121 [[maybe_unused]] const IR::Value& offset, 122 [[maybe_unused]] const IR::Value& offset,
122 [[maybe_unused]] std::string_view value) { 123 [[maybe_unused]] std::string_view value) {
123 const auto offset_var{ctx.var_alloc.Consume(offset)}; 124 const auto offset_var{ctx.var_alloc.Consume(offset)};
124 ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); 125 ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
125 ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); 126 ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
126 ctx.Add("{}_ssbo{}[({}+8)/4]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); 127 ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
127 ctx.Add("{}_ssbo{}[({}+12)/4]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); 128 ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
128} 129}
129} // namespace Shader::Backend::GLSL 130} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
index 8a4c69547..578bc349f 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -11,70 +11,70 @@
11namespace Shader::Backend::GLSL { 11namespace Shader::Backend::GLSL {
12void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 12void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
13 [[maybe_unused]] std::string_view offset) { 13 [[maybe_unused]] std::string_view offset) {
14 ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int({}%4)*8,8);", inst, offset, offset); 14 ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
15} 15}
16 16
17void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 17void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
18 [[maybe_unused]] std::string_view offset) { 18 [[maybe_unused]] std::string_view offset) {
19 ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int({}%4)*8,8);", inst, offset, offset); 19 ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
20} 20}
21 21
22void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 22void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
23 [[maybe_unused]] std::string_view offset) { 23 [[maybe_unused]] std::string_view offset) {
24 ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int(({}/2)%2)*16,16);", inst, offset, offset); 24 ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
25} 25}
26 26
27void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 27void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
28 [[maybe_unused]] std::string_view offset) { 28 [[maybe_unused]] std::string_view offset) {
29 ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int(({}/2)%2)*16,16);", inst, offset, offset); 29 ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
30} 30}
31 31
32void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 32void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
33 [[maybe_unused]] std::string_view offset) { 33 [[maybe_unused]] std::string_view offset) {
34 ctx.AddU32("{}=smem[{}/4];", inst, offset); 34 ctx.AddU32("{}=smem[{}>>2];", inst, offset);
35} 35}
36 36
37void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 37void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
38 [[maybe_unused]] std::string_view offset) { 38 [[maybe_unused]] std::string_view offset) {
39 ctx.AddU32x2("{}=uvec2(smem[{}/4],smem[({}+4)/4]);", inst, offset, offset); 39 ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
40} 40}
41 41
42void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 42void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
43 [[maybe_unused]] std::string_view offset) { 43 [[maybe_unused]] std::string_view offset) {
44 ctx.AddU32x4("{}=uvec4(smem[{}/4],smem[({}+4)/4],smem[({}+8)/4],smem[({}+12)/4]);", inst, 44 ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
45 offset, offset, offset, offset); 45 offset, offset, offset, offset);
46} 46}
47 47
48void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, 48void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
49 [[maybe_unused]] std::string_view value) { 49 [[maybe_unused]] std::string_view value) {
50 ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int({}%4)*8,8);", offset, offset, value, 50 ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value,
51 offset); 51 offset);
52} 52}
53 53
54void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, 54void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
55 [[maybe_unused]] std::string_view value) { 55 [[maybe_unused]] std::string_view value) {
56 ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int(({}/2)%2)*16,16);", offset, offset, value, 56 ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset,
57 offset); 57 value, offset);
58} 58}
59 59
60void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, 60void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
61 [[maybe_unused]] std::string_view value) { 61 [[maybe_unused]] std::string_view value) {
62 ctx.Add("smem[{}/4]={};", offset, value); 62 ctx.Add("smem[{}>>2]={};", offset, value);
63} 63}
64 64
65void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, 65void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
66 [[maybe_unused]] std::string_view value) { 66 [[maybe_unused]] std::string_view value) {
67 ctx.Add("smem[{}/4]={}.x;", offset, value); 67 ctx.Add("smem[{}>>2]={}.x;", offset, value);
68 ctx.Add("smem[({}+4)/4]={}.y;", offset, value); 68 ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
69} 69}
70 70
71void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, 71void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx,
72 [[maybe_unused]] std::string_view offset, 72 [[maybe_unused]] std::string_view offset,
73 [[maybe_unused]] std::string_view value) { 73 [[maybe_unused]] std::string_view value) {
74 ctx.Add("smem[{}/4]={}.x;", offset, value); 74 ctx.Add("smem[{}>>2]={}.x;", offset, value);
75 ctx.Add("smem[({}+4)/4]={}.y;", offset, value); 75 ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
76 ctx.Add("smem[({}+8)/4]={}.z;", offset, value); 76 ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
77 ctx.Add("smem[({}+12)/4]={}.w;", offset, value); 77 ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
78} 78}
79 79
80} // namespace Shader::Backend::GLSL 80} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index 4286f29c7..0b6c5ad82 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -112,7 +112,7 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
112 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; 112 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
113 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 113 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
114 SetInBoundsFlag(ctx, inst); 114 SetInBoundsFlag(ctx, inst);
115 ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); 115 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
116} 116}
117 117
118void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, 118void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
@@ -122,7 +122,7 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std
122 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; 122 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
123 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); 123 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
124 SetInBoundsFlag(ctx, inst); 124 SetInBoundsFlag(ctx, inst);
125 ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); 125 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
126} 126}
127 127
128void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, 128void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
@@ -133,7 +133,7 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
133 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; 133 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
134 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 134 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
135 SetInBoundsFlag(ctx, inst); 135 SetInBoundsFlag(ctx, inst);
136 ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); 136 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
137} 137}
138 138
139void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, 139void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
@@ -144,7 +144,7 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val
144 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; 144 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
145 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 145 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
146 SetInBoundsFlag(ctx, inst); 146 SetInBoundsFlag(ctx, inst);
147 ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); 147 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
148} 148}
149 149
150void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, 150void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,