summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/backend')
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp12
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h2
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp76
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp40
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp53
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h3
8 files changed, 151 insertions, 41 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index 6e940bd5a..ad39f44c3 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -449,7 +449,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
449} 449}
450 450
451void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, 451void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
452 std::string_view coords, std::string_view offset, std::string_view lod, 452 std::string_view coords, const IR::Value& offset, std::string_view lod,
453 std::string_view ms) { 453 std::string_view ms) {
454 const auto info{inst.Flags<IR::TextureInstInfo>()}; 454 const auto info{inst.Flags<IR::TextureInstInfo>()};
455 if (info.has_bias) { 455 if (info.has_bias) {
@@ -470,9 +470,9 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
470 const auto int_coords{CoordsCastToInt(coords, info)}; 470 const auto int_coords{CoordsCastToInt(coords, info)};
471 if (!ms.empty()) { 471 if (!ms.empty()) {
472 ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); 472 ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms);
473 } else if (!offset.empty()) { 473 } else if (!offset.IsEmpty()) {
474 ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, 474 ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod,
475 CoordsCastToInt(offset, info)); 475 GetOffsetVec(ctx, offset));
476 } else { 476 } else {
477 if (info.type == TextureType::Buffer) { 477 if (info.type == TextureType::Buffer) {
478 ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); 478 ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
@@ -485,10 +485,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
485 if (!ms.empty()) { 485 if (!ms.empty()) {
486 throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); 486 throw NotImplementedException("EmitImageFetch Sparse MSAA samples");
487 } 487 }
488 if (!offset.empty()) { 488 if (!offset.IsEmpty()) {
489 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", 489 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
490 *sparse_inst, texture, CastToIntVec(coords, info), lod, 490 *sparse_inst, texture, CastToIntVec(coords, info), lod, GetOffsetVec(ctx, offset),
491 CastToIntVec(offset, info), texel); 491 texel);
492 } else { 492 } else {
493 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", 493 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
494 *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); 494 *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index 8d0a65047..acebaa785 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -651,7 +651,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
651 std::string_view coords, const IR::Value& offset, const IR::Value& offset2, 651 std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
652 std::string_view dref); 652 std::string_view dref);
653void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, 653void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
654 std::string_view coords, std::string_view offset, std::string_view lod, 654 std::string_view coords, const IR::Value& offset, std::string_view lod,
655 std::string_view ms); 655 std::string_view ms);
656void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, 656void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
657 std::string_view lod, const IR::Value& skip_mips); 657 std::string_view lod, const IR::Value& skip_mips);
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index b2ceeefc4..c5ac7b8f2 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -608,8 +608,8 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
608 const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; 608 const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)};
609 const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; 609 const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])};
610 const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; 610 const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)};
611 const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; 611 const auto addr_statement{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
612 func += addr_statment; 612 func += addr_statement;
613 613
614 const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; 614 const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
615 const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; 615 const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 800754554..64a4e0e55 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -12,6 +12,11 @@ namespace Shader::Backend::SPIRV {
12namespace { 12namespace {
13class ImageOperands { 13class ImageOperands {
14public: 14public:
15 [[maybe_unused]] static constexpr bool ImageSampleOffsetAllowed = false;
16 [[maybe_unused]] static constexpr bool ImageGatherOffsetAllowed = true;
17 [[maybe_unused]] static constexpr bool ImageFetchOffsetAllowed = false;
18 [[maybe_unused]] static constexpr bool ImageGradientOffsetAllowed = false;
19
15 explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, 20 explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
16 Id lod, const IR::Value& offset) { 21 Id lod, const IR::Value& offset) {
17 if (has_bias) { 22 if (has_bias) {
@@ -22,7 +27,7 @@ public:
22 const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; 27 const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
23 Add(spv::ImageOperandsMask::Lod, lod_value); 28 Add(spv::ImageOperandsMask::Lod, lod_value);
24 } 29 }
25 AddOffset(ctx, offset); 30 AddOffset(ctx, offset, ImageSampleOffsetAllowed);
26 if (has_lod_clamp) { 31 if (has_lod_clamp) {
27 const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; 32 const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
28 Add(spv::ImageOperandsMask::MinLod, lod_clamp); 33 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
@@ -55,20 +60,17 @@ public:
55 Add(spv::ImageOperandsMask::ConstOffsets, offsets); 60 Add(spv::ImageOperandsMask::ConstOffsets, offsets);
56 } 61 }
57 62
58 explicit ImageOperands(Id offset, Id lod, Id ms) { 63 explicit ImageOperands(Id lod, Id ms) {
59 if (Sirit::ValidId(lod)) { 64 if (Sirit::ValidId(lod)) {
60 Add(spv::ImageOperandsMask::Lod, lod); 65 Add(spv::ImageOperandsMask::Lod, lod);
61 } 66 }
62 if (Sirit::ValidId(offset)) {
63 Add(spv::ImageOperandsMask::Offset, offset);
64 }
65 if (Sirit::ValidId(ms)) { 67 if (Sirit::ValidId(ms)) {
66 Add(spv::ImageOperandsMask::Sample, ms); 68 Add(spv::ImageOperandsMask::Sample, ms);
67 } 69 }
68 } 70 }
69 71
70 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, 72 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives,
71 u32 num_derivatives, Id offset, Id lod_clamp) { 73 u32 num_derivatives, const IR::Value& offset, Id lod_clamp) {
72 if (!Sirit::ValidId(derivatives)) { 74 if (!Sirit::ValidId(derivatives)) {
73 throw LogicError("Derivatives must be present"); 75 throw LogicError("Derivatives must be present");
74 } 76 }
@@ -83,16 +85,14 @@ public:
83 const Id derivatives_Y{ctx.OpCompositeConstruct( 85 const Id derivatives_Y{ctx.OpCompositeConstruct(
84 ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; 86 ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
85 Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); 87 Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y);
86 if (Sirit::ValidId(offset)) { 88 AddOffset(ctx, offset, ImageGradientOffsetAllowed);
87 Add(spv::ImageOperandsMask::Offset, offset);
88 }
89 if (has_lod_clamp) { 89 if (has_lod_clamp) {
90 Add(spv::ImageOperandsMask::MinLod, lod_clamp); 90 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
91 } 91 }
92 } 92 }
93 93
94 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, 94 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2,
95 Id offset, Id lod_clamp) { 95 const IR::Value& offset, Id lod_clamp) {
96 if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { 96 if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) {
97 throw LogicError("Derivatives must be present"); 97 throw LogicError("Derivatives must be present");
98 } 98 }
@@ -111,9 +111,7 @@ public:
111 const Id derivatives_id2{ctx.OpCompositeConstruct( 111 const Id derivatives_id2{ctx.OpCompositeConstruct(
112 ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; 112 ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})};
113 Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); 113 Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2);
114 if (Sirit::ValidId(offset)) { 114 AddOffset(ctx, offset, ImageGradientOffsetAllowed);
115 Add(spv::ImageOperandsMask::Offset, offset);
116 }
117 if (has_lod_clamp) { 115 if (has_lod_clamp) {
118 Add(spv::ImageOperandsMask::MinLod, lod_clamp); 116 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
119 } 117 }
@@ -132,7 +130,7 @@ public:
132 } 130 }
133 131
134private: 132private:
135 void AddOffset(EmitContext& ctx, const IR::Value& offset) { 133 void AddOffset(EmitContext& ctx, const IR::Value& offset, bool runtime_offset_allowed) {
136 if (offset.IsEmpty()) { 134 if (offset.IsEmpty()) {
137 return; 135 return;
138 } 136 }
@@ -165,7 +163,9 @@ private:
165 break; 163 break;
166 } 164 }
167 } 165 }
168 Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); 166 if (runtime_offset_allowed) {
167 Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
168 }
169 } 169 }
170 170
171 void Add(spv::ImageOperandsMask new_mask, Id value) { 171 void Add(spv::ImageOperandsMask new_mask, Id value) {
@@ -311,6 +311,37 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info,
311 return coords; 311 return coords;
312 } 312 }
313} 313}
314
315void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, Id& coords,
316 Id offset) {
317 if (!Sirit::ValidId(offset)) {
318 return;
319 }
320
321 Id result_type{};
322 switch (info.type) {
323 case TextureType::Buffer:
324 case TextureType::Color1D:
325 case TextureType::ColorArray1D: {
326 result_type = ctx.U32[1];
327 break;
328 }
329 case TextureType::Color2D:
330 case TextureType::Color2DRect:
331 case TextureType::ColorArray2D: {
332 result_type = ctx.U32[2];
333 break;
334 }
335 case TextureType::Color3D: {
336 result_type = ctx.U32[3];
337 break;
338 }
339 case TextureType::ColorCube:
340 case TextureType::ColorArrayCube:
341 return;
342 }
343 coords = ctx.OpIAdd(result_type, coords, offset);
344}
314} // Anonymous namespace 345} // Anonymous namespace
315 346
316Id EmitBindlessImageSampleImplicitLod(EmitContext&) { 347Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@@ -496,6 +527,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
496Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, 527Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
497 Id lod, Id ms) { 528 Id lod, Id ms) {
498 const auto info{inst->Flags<IR::TextureInstInfo>()}; 529 const auto info{inst->Flags<IR::TextureInstInfo>()};
530 AddOffsetToCoordinates(ctx, info, coords, offset);
499 if (info.type == TextureType::Buffer) { 531 if (info.type == TextureType::Buffer) {
500 lod = Id{}; 532 lod = Id{};
501 } 533 }
@@ -503,7 +535,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
503 // This image is multisampled, lod must be implicit 535 // This image is multisampled, lod must be implicit
504 lod = Id{}; 536 lod = Id{};
505 } 537 }
506 const ImageOperands operands(offset, lod, ms); 538 const ImageOperands operands(lod, ms);
507 return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], 539 return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
508 TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); 540 TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
509} 541}
@@ -548,13 +580,13 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
548} 580}
549 581
550Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, 582Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
551 Id derivatives, Id offset, Id lod_clamp) { 583 Id derivatives, const IR::Value& offset, Id lod_clamp) {
552 const auto info{inst->Flags<IR::TextureInstInfo>()}; 584 const auto info{inst->Flags<IR::TextureInstInfo>()};
553 const auto operands = 585 const auto operands = info.num_derivatives == 3
554 info.num_derivatives == 3 586 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
555 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp) 587 ctx.Def(offset), {}, lod_clamp)
556 : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, 588 : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
557 lod_clamp); 589 info.num_derivatives, offset, lod_clamp);
558 return Emit(&EmitContext::OpImageSparseSampleExplicitLod, 590 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
559 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], 591 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
560 Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); 592 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 7d34575c8..5c01b1012 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
543 const IR::Value& skip_mips); 543 const IR::Value& skip_mips);
544Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); 544Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
545Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, 545Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
546 Id derivatives, Id offset, Id lod_clamp); 546 Id derivatives, const IR::Value& offset, Id lod_clamp);
547Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); 547Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
548void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); 548void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
549Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); 549Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
index 8693801c7..bdcbccfde 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -65,6 +65,14 @@ void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value&
65 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), 65 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
66 &StorageDefinitions::U32, index_offset); 66 &StorageDefinitions::U32, index_offset);
67} 67}
68
69void WriteStorageByCasLoop(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
70 Id value, Id bit_offset, Id bit_count) {
71 const Id pointer{StoragePointer(ctx, binding, offset, ctx.storage_types.U32, sizeof(u32),
72 &StorageDefinitions::U32)};
73 ctx.OpFunctionCall(ctx.TypeVoid(), ctx.write_storage_cas_loop_func, pointer, value, bit_offset,
74 bit_count);
75}
68} // Anonymous namespace 76} // Anonymous namespace
69 77
70void EmitLoadGlobalU8(EmitContext&) { 78void EmitLoadGlobalU8(EmitContext&) {
@@ -219,26 +227,42 @@ Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Valu
219 227
220void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 228void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
221 Id value) { 229 Id value) {
222 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, 230 if (ctx.profile.support_int8) {
223 sizeof(u8), &StorageDefinitions::U8); 231 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
232 sizeof(u8), &StorageDefinitions::U8);
233 } else {
234 WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u));
235 }
224} 236}
225 237
226void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 238void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
227 Id value) { 239 Id value) {
228 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, 240 if (ctx.profile.support_int8) {
229 sizeof(s8), &StorageDefinitions::S8); 241 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
242 sizeof(s8), &StorageDefinitions::S8);
243 } else {
244 WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u));
245 }
230} 246}
231 247
232void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 248void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
233 Id value) { 249 Id value) {
234 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, 250 if (ctx.profile.support_int16) {
235 sizeof(u16), &StorageDefinitions::U16); 251 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
252 sizeof(u16), &StorageDefinitions::U16);
253 } else {
254 WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u));
255 }
236} 256}
237 257
238void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 258void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
239 Id value) { 259 Id value) {
240 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, 260 if (ctx.profile.support_int16) {
241 sizeof(s16), &StorageDefinitions::S16); 261 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
262 sizeof(s16), &StorageDefinitions::S16);
263 } else {
264 WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u));
265 }
242} 266}
243 267
244void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 268void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 89ebab08e..a27f2f73a 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -480,6 +480,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
480 DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); 480 DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
481 DefineImages(program.info, image_binding, bindings.image_scaling_index); 481 DefineImages(program.info, image_binding, bindings.image_scaling_index);
482 DefineAttributeMemAccess(program.info); 482 DefineAttributeMemAccess(program.info);
483 DefineWriteStorageCasLoopFunction(program.info);
483 DefineGlobalMemoryFunctions(program.info); 484 DefineGlobalMemoryFunctions(program.info);
484 DefineRescalingInput(program.info); 485 DefineRescalingInput(program.info);
485 DefineRenderArea(program.info); 486 DefineRenderArea(program.info);
@@ -877,6 +878,56 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
877 } 878 }
878} 879}
879 880
881void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) {
882 if (profile.support_int8 && profile.support_int16) {
883 return;
884 }
885 if (!info.uses_int8 && !info.uses_int16) {
886 return;
887 }
888
889 AddCapability(spv::Capability::VariablePointersStorageBuffer);
890
891 const Id ptr_type{TypePointer(spv::StorageClass::StorageBuffer, U32[1])};
892 const Id func_type{TypeFunction(void_id, ptr_type, U32[1], U32[1], U32[1])};
893 const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
894 const Id pointer{OpFunctionParameter(ptr_type)};
895 const Id value{OpFunctionParameter(U32[1])};
896 const Id bit_offset{OpFunctionParameter(U32[1])};
897 const Id bit_count{OpFunctionParameter(U32[1])};
898
899 AddLabel();
900 const Id scope_device{Const(1u)};
901 const Id ordering_relaxed{u32_zero_value};
902 const Id body_label{OpLabel()};
903 const Id continue_label{OpLabel()};
904 const Id endloop_label{OpLabel()};
905 const Id beginloop_label{OpLabel()};
906 OpBranch(beginloop_label);
907
908 AddLabel(beginloop_label);
909 OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
910 OpBranch(body_label);
911
912 AddLabel(body_label);
913 const Id expected_value{OpLoad(U32[1], pointer)};
914 const Id desired_value{OpBitFieldInsert(U32[1], expected_value, value, bit_offset, bit_count)};
915 const Id actual_value{OpAtomicCompareExchange(U32[1], pointer, scope_device, ordering_relaxed,
916 ordering_relaxed, desired_value, expected_value)};
917 const Id store_successful{OpIEqual(U1, expected_value, actual_value)};
918 OpBranchConditional(store_successful, endloop_label, continue_label);
919
920 AddLabel(endloop_label);
921 OpReturn();
922
923 AddLabel(continue_label);
924 OpBranch(beginloop_label);
925
926 OpFunctionEnd();
927
928 write_storage_cas_loop_func = func;
929}
930
880void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { 931void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
881 if (!info.uses_global_memory || !profile.support_int64) { 932 if (!info.uses_global_memory || !profile.support_int64) {
882 return; 933 return;
@@ -1440,7 +1491,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1440 if (profile.support_vertex_instance_id) { 1491 if (profile.support_vertex_instance_id) {
1441 instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); 1492 instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
1442 if (loads[IR::Attribute::BaseInstance]) { 1493 if (loads[IR::Attribute::BaseInstance]) {
1443 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); 1494 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
1444 } 1495 }
1445 } else { 1496 } else {
1446 instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); 1497 instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 56019ad89..40adcb6b6 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -325,6 +325,8 @@ public:
325 Id f32x2_min_cas{}; 325 Id f32x2_min_cas{};
326 Id f32x2_max_cas{}; 326 Id f32x2_max_cas{};
327 327
328 Id write_storage_cas_loop_func{};
329
328 Id load_global_func_u32{}; 330 Id load_global_func_u32{};
329 Id load_global_func_u32x2{}; 331 Id load_global_func_u32x2{};
330 Id load_global_func_u32x4{}; 332 Id load_global_func_u32x4{};
@@ -372,6 +374,7 @@ private:
372 void DefineTextures(const Info& info, u32& binding, u32& scaling_index); 374 void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
373 void DefineImages(const Info& info, u32& binding, u32& scaling_index); 375 void DefineImages(const Info& info, u32& binding, u32& scaling_index);
374 void DefineAttributeMemAccess(const Info& info); 376 void DefineAttributeMemAccess(const Info& info);
377 void DefineWriteStorageCasLoopFunction(const Info& info);
375 void DefineGlobalMemoryFunctions(const Info& info); 378 void DefineGlobalMemoryFunctions(const Info& info);
376 void DefineRescalingInput(const Info& info); 379 void DefineRescalingInput(const Info& info);
377 void DefineRescalingInputPushConstant(); 380 void DefineRescalingInputPushConstant();