diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | 161 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 2 |
2 files changed, 112 insertions, 51 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index b2caa222a..83ce6fcbb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -44,95 +44,154 @@ std::string OutputVertexIndex(EmitContext& ctx, std::string_view vertex) { | |||
| 44 | 44 | ||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 46 | const IR::Value& offset) { | 46 | const IR::Value& offset) { |
| 47 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 47 | if (offset.IsImmediate()) { | 48 | if (offset.IsImmediate()) { |
| 48 | ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, | 49 | ctx.AddU32("{}=bitfieldExtract(ftou({}[{}].{}),int({}),8);", inst, cbuf, offset.U32() / 16, |
| 49 | binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), | 50 | OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); |
| 50 | (offset.U32() % 4) * 8); | 51 | return; |
| 51 | } else { | 52 | } |
| 52 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 53 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 53 | ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, | 54 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 54 | ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 55 | ctx.AddU32("{}=bitfieldExtract(ftou({}[{}>>4][({}>>2)%4]),int(({}%4)*8),8);", inst, cbuf, |
| 56 | offset_var, offset_var, offset_var); | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 60 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 61 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 62 | ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftou({}[{}>>4].{}),int(({}%4)*8),8);", | ||
| 63 | cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); | ||
| 55 | } | 64 | } |
| 56 | } | 65 | } |
| 57 | 66 | ||
| 58 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 67 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 59 | const IR::Value& offset) { | 68 | const IR::Value& offset) { |
| 69 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 60 | if (offset.IsImmediate()) { | 70 | if (offset.IsImmediate()) { |
| 61 | ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, | 71 | ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}].{}),int({}),8);", inst, cbuf, offset.U32() / 16, |
| 62 | binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), | 72 | OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); |
| 63 | (offset.U32() % 4) * 8); | 73 | return; |
| 64 | } else { | 74 | } |
| 65 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 75 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 66 | ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, | 76 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 67 | ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 77 | ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}>>4][({}>>2)%4]),int(({}%4)*8),8);", inst, cbuf, |
| 78 | offset_var, offset_var, offset_var); | ||
| 79 | return; | ||
| 80 | } | ||
| 81 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 82 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 83 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 84 | ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftoi({}[{}>>4].{}),int(({}%4)*8),8);", | ||
| 85 | cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); | ||
| 68 | } | 86 | } |
| 69 | } | 87 | } |
| 70 | 88 | ||
| 71 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 89 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 72 | const IR::Value& offset) { | 90 | const IR::Value& offset) { |
| 91 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 73 | if (offset.IsImmediate()) { | 92 | if (offset.IsImmediate()) { |
| 74 | ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, | 93 | ctx.AddU32("{}=bitfieldExtract(ftou({}[{}].{}),int({}),16);", inst, cbuf, offset.U32() / 16, |
| 75 | binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), | 94 | OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); |
| 76 | ((offset.U32() / 2) % 2) * 16); | 95 | return; |
| 77 | } else { | 96 | } |
| 78 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 97 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 79 | ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" | 98 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 80 | "2)%2)*16),16);", | 99 | ctx.AddU32("{}=bitfieldExtract(ftou({}[{}>>4][({}>>2)%4]),int((({}>>1)%2)*16),16);", inst, |
| 81 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 100 | cbuf, offset_var, offset_var, offset_var); |
| 101 | return; | ||
| 102 | } | ||
| 103 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 104 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 105 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 106 | ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftou({}[{}>>4].{}),int((({}>>1)%2)*16),16);", | ||
| 107 | cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); | ||
| 82 | } | 108 | } |
| 83 | } | 109 | } |
| 84 | 110 | ||
| 85 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 111 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 86 | const IR::Value& offset) { | 112 | const IR::Value& offset) { |
| 113 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 87 | if (offset.IsImmediate()) { | 114 | if (offset.IsImmediate()) { |
| 88 | ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, | 115 | ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}].{}),int({}),16);", inst, cbuf, offset.U32() / 16, |
| 89 | binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), | 116 | OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); |
| 90 | ((offset.U32() / 2) % 2) * 16); | 117 | return; |
| 91 | } else { | 118 | } |
| 92 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 119 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 93 | ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" | 120 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 94 | "2)%2)*16),16);", | 121 | ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}>>4][({}>>2)%4]),int((({}>>1)%2)*16),16);", inst, |
| 95 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); | 122 | cbuf, offset_var, offset_var, offset_var); |
| 123 | return; | ||
| 124 | } | ||
| 125 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 126 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 127 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 128 | ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftoi({}[{}>>4].{}),int((({}>>1)%2)*16),16);", | ||
| 129 | cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); | ||
| 96 | } | 130 | } |
| 97 | } | 131 | } |
| 98 | 132 | ||
| 99 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 133 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 100 | const IR::Value& offset) { | 134 | const IR::Value& offset) { |
| 135 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 101 | if (offset.IsImmediate()) { | 136 | if (offset.IsImmediate()) { |
| 102 | ctx.AddU32("{}=ftou({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(), | 137 | ctx.AddU32("{}=ftou({}[{}].{});", inst, cbuf, offset.U32() / 16, |
| 103 | offset.U32() / 16, OffsetSwizzle(offset.U32())); | 138 | OffsetSwizzle(offset.U32())); |
| 104 | } else { | 139 | return; |
| 105 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 140 | } |
| 106 | ctx.AddU32("{}=ftou({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, binding.U32(), | 141 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 107 | offset_var, offset_var); | 142 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 143 | ctx.AddU32("{}=ftou({}[{}>>4][({}>>2)%4]);", inst, cbuf, offset_var, offset_var); | ||
| 144 | return; | ||
| 145 | } | ||
| 146 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 147 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 148 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 149 | ctx.Add("if(({}&3)=={}){}=ftou({}[{}>>4].{});", cbuf_offset, swizzle, ret, cbuf, offset_var, | ||
| 150 | "xyzw"[swizzle]); | ||
| 108 | } | 151 | } |
| 109 | } | 152 | } |
| 110 | 153 | ||
| 111 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 154 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 112 | const IR::Value& offset) { | 155 | const IR::Value& offset) { |
| 156 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 113 | if (offset.IsImmediate()) { | 157 | if (offset.IsImmediate()) { |
| 114 | ctx.AddF32("{}={}_cbuf{}[{}].{};", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, | 158 | ctx.AddF32("{}={}[{}].{};", inst, cbuf, offset.U32() / 16, OffsetSwizzle(offset.U32())); |
| 115 | OffsetSwizzle(offset.U32())); | 159 | return; |
| 116 | } else { | 160 | } |
| 117 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 161 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 118 | ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(), | 162 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 119 | offset_var, offset_var); | 163 | ctx.AddF32("{}={}[{}>>4][({}>>2)%4];", inst, cbuf, offset_var, offset_var); |
| 164 | return; | ||
| 165 | } | ||
| 166 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; | ||
| 167 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 168 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 169 | ctx.Add("if(({}&3)=={}){}={}[{}>>4].{};", cbuf_offset, swizzle, ret, cbuf, offset_var, | ||
| 170 | "xyzw"[swizzle]); | ||
| 120 | } | 171 | } |
| 121 | } | 172 | } |
| 122 | 173 | ||
| 123 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 174 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 124 | const IR::Value& offset) { | 175 | const IR::Value& offset) { |
| 176 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 125 | if (offset.IsImmediate()) { | 177 | if (offset.IsImmediate()) { |
| 126 | ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}].{}),ftou({}_cbuf{}[{}].{}));", inst, | 178 | ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, offset.U32() / 16, |
| 127 | ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), | 179 | OffsetSwizzle(offset.U32()), cbuf, (offset.U32() + 4) / 16, |
| 128 | ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16, | ||
| 129 | OffsetSwizzle(offset.U32() + 4)); | 180 | OffsetSwizzle(offset.U32() + 4)); |
| 130 | } else { | 181 | return; |
| 131 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 182 | } |
| 132 | ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}/16][({}/" | 183 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 133 | "4)%4]),ftou({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", | 184 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 134 | inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, | 185 | ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", |
| 135 | binding.U32(), offset_var, offset_var); | 186 | inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); |
| 187 | return; | ||
| 188 | } | ||
| 189 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | ||
| 190 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 191 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 192 | ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, | ||
| 193 | swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, | ||
| 194 | "xyzw"[(swizzle + 1) % 4]); | ||
| 136 | } | 195 | } |
| 137 | } | 196 | } |
| 138 | 197 | ||
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3bbd5a531..bc61a911f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -101,6 +101,8 @@ struct Profile { | |||
| 101 | bool has_broken_unsigned_image_offsets{}; | 101 | bool has_broken_unsigned_image_offsets{}; |
| 102 | /// Signed instructions with unsigned data types are misinterpreted | 102 | /// Signed instructions with unsigned data types are misinterpreted |
| 103 | bool has_broken_signed_operations{}; | 103 | bool has_broken_signed_operations{}; |
| 104 | /// Dynamic vec4 indexing is broken on some OpenGL drivers | ||
| 105 | bool has_gl_component_indexing_bug{}; | ||
| 104 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics | 106 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics |
| 105 | bool ignore_nan_fp_comparisons{}; | 107 | bool ignore_nan_fp_comparisons{}; |
| 106 | }; | 108 | }; |