diff options
Diffstat (limited to 'src/shader_recompiler/backend')
8 files changed, 151 insertions, 41 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 6e940bd5a..ad39f44c3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -449,7 +449,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde | |||
| 449 | } | 449 | } |
| 450 | 450 | ||
| 451 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 451 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 452 | std::string_view coords, std::string_view offset, std::string_view lod, | 452 | std::string_view coords, const IR::Value& offset, std::string_view lod, |
| 453 | std::string_view ms) { | 453 | std::string_view ms) { |
| 454 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 454 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 455 | if (info.has_bias) { | 455 | if (info.has_bias) { |
| @@ -470,9 +470,9 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 470 | const auto int_coords{CoordsCastToInt(coords, info)}; | 470 | const auto int_coords{CoordsCastToInt(coords, info)}; |
| 471 | if (!ms.empty()) { | 471 | if (!ms.empty()) { |
| 472 | ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); | 472 | ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); |
| 473 | } else if (!offset.empty()) { | 473 | } else if (!offset.IsEmpty()) { |
| 474 | ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, | 474 | ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, |
| 475 | CoordsCastToInt(offset, info)); | 475 | GetOffsetVec(ctx, offset)); |
| 476 | } else { | 476 | } else { |
| 477 | if (info.type == TextureType::Buffer) { | 477 | if (info.type == TextureType::Buffer) { |
| 478 | ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); | 478 | ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); |
| @@ -485,10 +485,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 485 | if (!ms.empty()) { | 485 | if (!ms.empty()) { |
| 486 | throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); | 486 | throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); |
| 487 | } | 487 | } |
| 488 | if (!offset.empty()) { | 488 | if (!offset.IsEmpty()) { |
| 489 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", | 489 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", |
| 490 | *sparse_inst, texture, CastToIntVec(coords, info), lod, | 490 | *sparse_inst, texture, CastToIntVec(coords, info), lod, GetOffsetVec(ctx, offset), |
| 491 | CastToIntVec(offset, info), texel); | 491 | texel); |
| 492 | } else { | 492 | } else { |
| 493 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", | 493 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", |
| 494 | *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); | 494 | *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 8d0a65047..acebaa785 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | |||
| @@ -651,7 +651,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde | |||
| 651 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2, | 651 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2, |
| 652 | std::string_view dref); | 652 | std::string_view dref); |
| 653 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 653 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 654 | std::string_view coords, std::string_view offset, std::string_view lod, | 654 | std::string_view coords, const IR::Value& offset, std::string_view lod, |
| 655 | std::string_view ms); | 655 | std::string_view ms); |
| 656 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 656 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 657 | std::string_view lod, const IR::Value& skip_mips); | 657 | std::string_view lod, const IR::Value& skip_mips); |
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index b2ceeefc4..c5ac7b8f2 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | |||
| @@ -608,8 +608,8 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | |||
| 608 | const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; | 608 | const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; |
| 609 | const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; | 609 | const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; |
| 610 | const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; | 610 | const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; |
| 611 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | 611 | const auto addr_statement{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; |
| 612 | func += addr_statment; | 612 | func += addr_statement; |
| 613 | 613 | ||
| 614 | const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; | 614 | const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; |
| 615 | const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; | 615 | const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 800754554..64a4e0e55 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -12,6 +12,11 @@ namespace Shader::Backend::SPIRV { | |||
| 12 | namespace { | 12 | namespace { |
| 13 | class ImageOperands { | 13 | class ImageOperands { |
| 14 | public: | 14 | public: |
| 15 | [[maybe_unused]] static constexpr bool ImageSampleOffsetAllowed = false; | ||
| 16 | [[maybe_unused]] static constexpr bool ImageGatherOffsetAllowed = true; | ||
| 17 | [[maybe_unused]] static constexpr bool ImageFetchOffsetAllowed = false; | ||
| 18 | [[maybe_unused]] static constexpr bool ImageGradientOffsetAllowed = false; | ||
| 19 | |||
| 15 | explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, | 20 | explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, |
| 16 | Id lod, const IR::Value& offset) { | 21 | Id lod, const IR::Value& offset) { |
| 17 | if (has_bias) { | 22 | if (has_bias) { |
| @@ -22,7 +27,7 @@ public: | |||
| 22 | const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | 27 | const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; |
| 23 | Add(spv::ImageOperandsMask::Lod, lod_value); | 28 | Add(spv::ImageOperandsMask::Lod, lod_value); |
| 24 | } | 29 | } |
| 25 | AddOffset(ctx, offset); | 30 | AddOffset(ctx, offset, ImageSampleOffsetAllowed); |
| 26 | if (has_lod_clamp) { | 31 | if (has_lod_clamp) { |
| 27 | const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; | 32 | const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; |
| 28 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | 33 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); |
| @@ -55,20 +60,17 @@ public: | |||
| 55 | Add(spv::ImageOperandsMask::ConstOffsets, offsets); | 60 | Add(spv::ImageOperandsMask::ConstOffsets, offsets); |
| 56 | } | 61 | } |
| 57 | 62 | ||
| 58 | explicit ImageOperands(Id offset, Id lod, Id ms) { | 63 | explicit ImageOperands(Id lod, Id ms) { |
| 59 | if (Sirit::ValidId(lod)) { | 64 | if (Sirit::ValidId(lod)) { |
| 60 | Add(spv::ImageOperandsMask::Lod, lod); | 65 | Add(spv::ImageOperandsMask::Lod, lod); |
| 61 | } | 66 | } |
| 62 | if (Sirit::ValidId(offset)) { | ||
| 63 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 64 | } | ||
| 65 | if (Sirit::ValidId(ms)) { | 67 | if (Sirit::ValidId(ms)) { |
| 66 | Add(spv::ImageOperandsMask::Sample, ms); | 68 | Add(spv::ImageOperandsMask::Sample, ms); |
| 67 | } | 69 | } |
| 68 | } | 70 | } |
| 69 | 71 | ||
| 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, | 72 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, |
| 71 | u32 num_derivatives, Id offset, Id lod_clamp) { | 73 | u32 num_derivatives, const IR::Value& offset, Id lod_clamp) { |
| 72 | if (!Sirit::ValidId(derivatives)) { | 74 | if (!Sirit::ValidId(derivatives)) { |
| 73 | throw LogicError("Derivatives must be present"); | 75 | throw LogicError("Derivatives must be present"); |
| 74 | } | 76 | } |
| @@ -83,16 +85,14 @@ public: | |||
| 83 | const Id derivatives_Y{ctx.OpCompositeConstruct( | 85 | const Id derivatives_Y{ctx.OpCompositeConstruct( |
| 84 | ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; | 86 | ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; |
| 85 | Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); | 87 | Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); |
| 86 | if (Sirit::ValidId(offset)) { | 88 | AddOffset(ctx, offset, ImageGradientOffsetAllowed); |
| 87 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 88 | } | ||
| 89 | if (has_lod_clamp) { | 89 | if (has_lod_clamp) { |
| 90 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | 90 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); |
| 91 | } | 91 | } |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, | 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, |
| 95 | Id offset, Id lod_clamp) { | 95 | const IR::Value& offset, Id lod_clamp) { |
| 96 | if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { | 96 | if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { |
| 97 | throw LogicError("Derivatives must be present"); | 97 | throw LogicError("Derivatives must be present"); |
| 98 | } | 98 | } |
| @@ -111,9 +111,7 @@ public: | |||
| 111 | const Id derivatives_id2{ctx.OpCompositeConstruct( | 111 | const Id derivatives_id2{ctx.OpCompositeConstruct( |
| 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; | 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; |
| 113 | Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); | 113 | Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); |
| 114 | if (Sirit::ValidId(offset)) { | 114 | AddOffset(ctx, offset, ImageGradientOffsetAllowed); |
| 115 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 116 | } | ||
| 117 | if (has_lod_clamp) { | 115 | if (has_lod_clamp) { |
| 118 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | 116 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); |
| 119 | } | 117 | } |
| @@ -132,7 +130,7 @@ public: | |||
| 132 | } | 130 | } |
| 133 | 131 | ||
| 134 | private: | 132 | private: |
| 135 | void AddOffset(EmitContext& ctx, const IR::Value& offset) { | 133 | void AddOffset(EmitContext& ctx, const IR::Value& offset, bool runtime_offset_allowed) { |
| 136 | if (offset.IsEmpty()) { | 134 | if (offset.IsEmpty()) { |
| 137 | return; | 135 | return; |
| 138 | } | 136 | } |
| @@ -165,7 +163,9 @@ private: | |||
| 165 | break; | 163 | break; |
| 166 | } | 164 | } |
| 167 | } | 165 | } |
| 168 | Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); | 166 | if (runtime_offset_allowed) { |
| 167 | Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); | ||
| 168 | } | ||
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | void Add(spv::ImageOperandsMask new_mask, Id value) { | 171 | void Add(spv::ImageOperandsMask new_mask, Id value) { |
| @@ -311,6 +311,37 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, | |||
| 311 | return coords; | 311 | return coords; |
| 312 | } | 312 | } |
| 313 | } | 313 | } |
| 314 | |||
| 315 | void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, Id& coords, | ||
| 316 | Id offset) { | ||
| 317 | if (!Sirit::ValidId(offset)) { | ||
| 318 | return; | ||
| 319 | } | ||
| 320 | |||
| 321 | Id result_type{}; | ||
| 322 | switch (info.type) { | ||
| 323 | case TextureType::Buffer: | ||
| 324 | case TextureType::Color1D: | ||
| 325 | case TextureType::ColorArray1D: { | ||
| 326 | result_type = ctx.U32[1]; | ||
| 327 | break; | ||
| 328 | } | ||
| 329 | case TextureType::Color2D: | ||
| 330 | case TextureType::Color2DRect: | ||
| 331 | case TextureType::ColorArray2D: { | ||
| 332 | result_type = ctx.U32[2]; | ||
| 333 | break; | ||
| 334 | } | ||
| 335 | case TextureType::Color3D: { | ||
| 336 | result_type = ctx.U32[3]; | ||
| 337 | break; | ||
| 338 | } | ||
| 339 | case TextureType::ColorCube: | ||
| 340 | case TextureType::ColorArrayCube: | ||
| 341 | return; | ||
| 342 | } | ||
| 343 | coords = ctx.OpIAdd(result_type, coords, offset); | ||
| 344 | } | ||
| 314 | } // Anonymous namespace | 345 | } // Anonymous namespace |
| 315 | 346 | ||
| 316 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | 347 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { |
| @@ -496,6 +527,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | |||
| 496 | Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | 527 | Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, |
| 497 | Id lod, Id ms) { | 528 | Id lod, Id ms) { |
| 498 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 529 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 530 | AddOffsetToCoordinates(ctx, info, coords, offset); | ||
| 499 | if (info.type == TextureType::Buffer) { | 531 | if (info.type == TextureType::Buffer) { |
| 500 | lod = Id{}; | 532 | lod = Id{}; |
| 501 | } | 533 | } |
| @@ -503,7 +535,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c | |||
| 503 | // This image is multisampled, lod must be implicit | 535 | // This image is multisampled, lod must be implicit |
| 504 | lod = Id{}; | 536 | lod = Id{}; |
| 505 | } | 537 | } |
| 506 | const ImageOperands operands(offset, lod, ms); | 538 | const ImageOperands operands(lod, ms); |
| 507 | return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], | 539 | return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], |
| 508 | TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); | 540 | TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); |
| 509 | } | 541 | } |
| @@ -548,13 +580,13 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I | |||
| 548 | } | 580 | } |
| 549 | 581 | ||
| 550 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 582 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 551 | Id derivatives, Id offset, Id lod_clamp) { | 583 | Id derivatives, const IR::Value& offset, Id lod_clamp) { |
| 552 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 584 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 553 | const auto operands = | 585 | const auto operands = info.num_derivatives == 3 |
| 554 | info.num_derivatives == 3 | 586 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, |
| 555 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp) | 587 | ctx.Def(offset), {}, lod_clamp) |
| 556 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, | 588 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, |
| 557 | lod_clamp); | 589 | info.num_derivatives, offset, lod_clamp); |
| 558 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | 590 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, |
| 559 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | 591 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], |
| 560 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | 592 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 7d34575c8..5c01b1012 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | |||
| @@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i | |||
| 543 | const IR::Value& skip_mips); | 543 | const IR::Value& skip_mips); |
| 544 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 544 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 545 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 545 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 546 | Id derivatives, Id offset, Id lod_clamp); | 546 | Id derivatives, const IR::Value& offset, Id lod_clamp); |
| 547 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 547 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 548 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); | 548 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); |
| 549 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); | 549 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 8693801c7..bdcbccfde 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp | |||
| @@ -65,6 +65,14 @@ void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& | |||
| 65 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), | 65 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), |
| 66 | &StorageDefinitions::U32, index_offset); | 66 | &StorageDefinitions::U32, index_offset); |
| 67 | } | 67 | } |
| 68 | |||
| 69 | void WriteStorageByCasLoop(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 70 | Id value, Id bit_offset, Id bit_count) { | ||
| 71 | const Id pointer{StoragePointer(ctx, binding, offset, ctx.storage_types.U32, sizeof(u32), | ||
| 72 | &StorageDefinitions::U32)}; | ||
| 73 | ctx.OpFunctionCall(ctx.TypeVoid(), ctx.write_storage_cas_loop_func, pointer, value, bit_offset, | ||
| 74 | bit_count); | ||
| 75 | } | ||
| 68 | } // Anonymous namespace | 76 | } // Anonymous namespace |
| 69 | 77 | ||
| 70 | void EmitLoadGlobalU8(EmitContext&) { | 78 | void EmitLoadGlobalU8(EmitContext&) { |
| @@ -219,26 +227,42 @@ Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Valu | |||
| 219 | 227 | ||
| 220 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 228 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 221 | Id value) { | 229 | Id value) { |
| 222 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, | 230 | if (ctx.profile.support_int8) { |
| 223 | sizeof(u8), &StorageDefinitions::U8); | 231 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, |
| 232 | sizeof(u8), &StorageDefinitions::U8); | ||
| 233 | } else { | ||
| 234 | WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u)); | ||
| 235 | } | ||
| 224 | } | 236 | } |
| 225 | 237 | ||
| 226 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 238 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 227 | Id value) { | 239 | Id value) { |
| 228 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, | 240 | if (ctx.profile.support_int8) { |
| 229 | sizeof(s8), &StorageDefinitions::S8); | 241 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, |
| 242 | sizeof(s8), &StorageDefinitions::S8); | ||
| 243 | } else { | ||
| 244 | WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u)); | ||
| 245 | } | ||
| 230 | } | 246 | } |
| 231 | 247 | ||
| 232 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 248 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 233 | Id value) { | 249 | Id value) { |
| 234 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, | 250 | if (ctx.profile.support_int16) { |
| 235 | sizeof(u16), &StorageDefinitions::U16); | 251 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, |
| 252 | sizeof(u16), &StorageDefinitions::U16); | ||
| 253 | } else { | ||
| 254 | WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u)); | ||
| 255 | } | ||
| 236 | } | 256 | } |
| 237 | 257 | ||
| 238 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 258 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 239 | Id value) { | 259 | Id value) { |
| 240 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, | 260 | if (ctx.profile.support_int16) { |
| 241 | sizeof(s16), &StorageDefinitions::S16); | 261 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, |
| 262 | sizeof(s16), &StorageDefinitions::S16); | ||
| 263 | } else { | ||
| 264 | WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u)); | ||
| 265 | } | ||
| 242 | } | 266 | } |
| 243 | 267 | ||
| 244 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 268 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 89ebab08e..a27f2f73a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -480,6 +480,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf | |||
| 480 | DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); | 480 | DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); |
| 481 | DefineImages(program.info, image_binding, bindings.image_scaling_index); | 481 | DefineImages(program.info, image_binding, bindings.image_scaling_index); |
| 482 | DefineAttributeMemAccess(program.info); | 482 | DefineAttributeMemAccess(program.info); |
| 483 | DefineWriteStorageCasLoopFunction(program.info); | ||
| 483 | DefineGlobalMemoryFunctions(program.info); | 484 | DefineGlobalMemoryFunctions(program.info); |
| 484 | DefineRescalingInput(program.info); | 485 | DefineRescalingInput(program.info); |
| 485 | DefineRenderArea(program.info); | 486 | DefineRenderArea(program.info); |
| @@ -877,6 +878,56 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | |||
| 877 | } | 878 | } |
| 878 | } | 879 | } |
| 879 | 880 | ||
| 881 | void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) { | ||
| 882 | if (profile.support_int8 && profile.support_int16) { | ||
| 883 | return; | ||
| 884 | } | ||
| 885 | if (!info.uses_int8 && !info.uses_int16) { | ||
| 886 | return; | ||
| 887 | } | ||
| 888 | |||
| 889 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 890 | |||
| 891 | const Id ptr_type{TypePointer(spv::StorageClass::StorageBuffer, U32[1])}; | ||
| 892 | const Id func_type{TypeFunction(void_id, ptr_type, U32[1], U32[1], U32[1])}; | ||
| 893 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 894 | const Id pointer{OpFunctionParameter(ptr_type)}; | ||
| 895 | const Id value{OpFunctionParameter(U32[1])}; | ||
| 896 | const Id bit_offset{OpFunctionParameter(U32[1])}; | ||
| 897 | const Id bit_count{OpFunctionParameter(U32[1])}; | ||
| 898 | |||
| 899 | AddLabel(); | ||
| 900 | const Id scope_device{Const(1u)}; | ||
| 901 | const Id ordering_relaxed{u32_zero_value}; | ||
| 902 | const Id body_label{OpLabel()}; | ||
| 903 | const Id continue_label{OpLabel()}; | ||
| 904 | const Id endloop_label{OpLabel()}; | ||
| 905 | const Id beginloop_label{OpLabel()}; | ||
| 906 | OpBranch(beginloop_label); | ||
| 907 | |||
| 908 | AddLabel(beginloop_label); | ||
| 909 | OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); | ||
| 910 | OpBranch(body_label); | ||
| 911 | |||
| 912 | AddLabel(body_label); | ||
| 913 | const Id expected_value{OpLoad(U32[1], pointer)}; | ||
| 914 | const Id desired_value{OpBitFieldInsert(U32[1], expected_value, value, bit_offset, bit_count)}; | ||
| 915 | const Id actual_value{OpAtomicCompareExchange(U32[1], pointer, scope_device, ordering_relaxed, | ||
| 916 | ordering_relaxed, desired_value, expected_value)}; | ||
| 917 | const Id store_successful{OpIEqual(U1, expected_value, actual_value)}; | ||
| 918 | OpBranchConditional(store_successful, endloop_label, continue_label); | ||
| 919 | |||
| 920 | AddLabel(endloop_label); | ||
| 921 | OpReturn(); | ||
| 922 | |||
| 923 | AddLabel(continue_label); | ||
| 924 | OpBranch(beginloop_label); | ||
| 925 | |||
| 926 | OpFunctionEnd(); | ||
| 927 | |||
| 928 | write_storage_cas_loop_func = func; | ||
| 929 | } | ||
| 930 | |||
| 880 | void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | 931 | void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { |
| 881 | if (!info.uses_global_memory || !profile.support_int64) { | 932 | if (!info.uses_global_memory || !profile.support_int64) { |
| 882 | return; | 933 | return; |
| @@ -1440,7 +1491,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1440 | if (profile.support_vertex_instance_id) { | 1491 | if (profile.support_vertex_instance_id) { |
| 1441 | instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); | 1492 | instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); |
| 1442 | if (loads[IR::Attribute::BaseInstance]) { | 1493 | if (loads[IR::Attribute::BaseInstance]) { |
| 1443 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | 1494 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); |
| 1444 | } | 1495 | } |
| 1445 | } else { | 1496 | } else { |
| 1446 | instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); | 1497 | instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 56019ad89..40adcb6b6 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h | |||
| @@ -325,6 +325,8 @@ public: | |||
| 325 | Id f32x2_min_cas{}; | 325 | Id f32x2_min_cas{}; |
| 326 | Id f32x2_max_cas{}; | 326 | Id f32x2_max_cas{}; |
| 327 | 327 | ||
| 328 | Id write_storage_cas_loop_func{}; | ||
| 329 | |||
| 328 | Id load_global_func_u32{}; | 330 | Id load_global_func_u32{}; |
| 329 | Id load_global_func_u32x2{}; | 331 | Id load_global_func_u32x2{}; |
| 330 | Id load_global_func_u32x4{}; | 332 | Id load_global_func_u32x4{}; |
| @@ -372,6 +374,7 @@ private: | |||
| 372 | void DefineTextures(const Info& info, u32& binding, u32& scaling_index); | 374 | void DefineTextures(const Info& info, u32& binding, u32& scaling_index); |
| 373 | void DefineImages(const Info& info, u32& binding, u32& scaling_index); | 375 | void DefineImages(const Info& info, u32& binding, u32& scaling_index); |
| 374 | void DefineAttributeMemAccess(const Info& info); | 376 | void DefineAttributeMemAccess(const Info& info); |
| 377 | void DefineWriteStorageCasLoopFunction(const Info& info); | ||
| 375 | void DefineGlobalMemoryFunctions(const Info& info); | 378 | void DefineGlobalMemoryFunctions(const Info& info); |
| 376 | void DefineRescalingInput(const Info& info); | 379 | void DefineRescalingInput(const Info& info); |
| 377 | void DefineRescalingInputPushConstant(); | 380 | void DefineRescalingInputPushConstant(); |