diff options
Diffstat (limited to '')
13 files changed, 315 insertions, 11 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 4161783c8..1f286484c 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -124,6 +124,7 @@ add_library(shader_recompiler STATIC | |||
| 124 | frontend/maxwell/translate/impl/select_source_with_predicate.cpp | 124 | frontend/maxwell/translate/impl/select_source_with_predicate.cpp |
| 125 | frontend/maxwell/translate/impl/texture_fetch.cpp | 125 | frontend/maxwell/translate/impl/texture_fetch.cpp |
| 126 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | 126 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp |
| 127 | frontend/maxwell/translate/impl/texture_gather.cpp | ||
| 127 | frontend/maxwell/translate/impl/vote.cpp | 128 | frontend/maxwell/translate/impl/vote.cpp |
| 128 | frontend/maxwell/translate/impl/warp_shuffle.cpp | 129 | frontend/maxwell/translate/impl/warp_shuffle.cpp |
| 129 | frontend/maxwell/translate/translate.cpp | 130 | frontend/maxwell/translate/translate.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index a233a4817..4f945b917 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -340,10 +340,14 @@ Id EmitBindlessImageSampleImplicitLod(EmitContext&); | |||
| 340 | Id EmitBindlessImageSampleExplicitLod(EmitContext&); | 340 | Id EmitBindlessImageSampleExplicitLod(EmitContext&); |
| 341 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | 341 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); |
| 342 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | 342 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); |
| 343 | Id EmitBindlessImageGather(EmitContext&); | ||
| 344 | Id EmitBindlessImageGatherDref(EmitContext&); | ||
| 343 | Id EmitBoundImageSampleImplicitLod(EmitContext&); | 345 | Id EmitBoundImageSampleImplicitLod(EmitContext&); |
| 344 | Id EmitBoundImageSampleExplicitLod(EmitContext&); | 346 | Id EmitBoundImageSampleExplicitLod(EmitContext&); |
| 345 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); | 347 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); |
| 346 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); | 348 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); |
| 349 | Id EmitBoundImageGather(EmitContext&); | ||
| 350 | Id EmitBoundImageGatherDref(EmitContext&); | ||
| 347 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 351 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 348 | Id bias_lc, Id offset); | 352 | Id bias_lc, Id offset); |
| 349 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 353 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| @@ -352,6 +356,10 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va | |||
| 352 | Id coords, Id dref, Id bias_lc, Id offset); | 356 | Id coords, Id dref, Id bias_lc, Id offset); |
| 353 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | 357 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, |
| 354 | Id coords, Id dref, Id lod_lc, Id offset); | 358 | Id coords, Id dref, Id lod_lc, Id offset); |
| 359 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||
| 360 | Id offset2); | ||
| 361 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 362 | Id offset, Id offset2, Id dref); | ||
| 355 | Id EmitVoteAll(EmitContext& ctx, Id pred); | 363 | Id EmitVoteAll(EmitContext& ctx, Id pred); |
| 356 | Id EmitVoteAny(EmitContext& ctx, Id pred); | 364 | Id EmitVoteAny(EmitContext& ctx, Id pred); |
| 357 | Id EmitVoteEqual(EmitContext& ctx, Id pred); | 365 | Id EmitVoteEqual(EmitContext& ctx, Id pred); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f75152911..589013773 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -30,6 +30,12 @@ public: | |||
| 30 | } | 30 | } |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset) { | ||
| 34 | if (Sirit::ValidId(offset)) { | ||
| 35 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 33 | void Add(spv::ImageOperandsMask new_mask, Id value) { | 39 | void Add(spv::ImageOperandsMask new_mask, Id value) { |
| 34 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | 40 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | |
| 35 | static_cast<unsigned>(new_mask)); | 41 | static_cast<unsigned>(new_mask)); |
| @@ -98,6 +104,14 @@ Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | |||
| 98 | throw LogicError("Unreachable instruction"); | 104 | throw LogicError("Unreachable instruction"); |
| 99 | } | 105 | } |
| 100 | 106 | ||
| 107 | Id EmitBindlessImageGather(EmitContext&) { | ||
| 108 | throw LogicError("Unreachable instruction"); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitBindlessImageGatherDref(EmitContext&) { | ||
| 112 | throw LogicError("Unreachable instruction"); | ||
| 113 | } | ||
| 114 | |||
| 101 | Id EmitBoundImageSampleImplicitLod(EmitContext&) { | 115 | Id EmitBoundImageSampleImplicitLod(EmitContext&) { |
| 102 | throw LogicError("Unreachable instruction"); | 116 | throw LogicError("Unreachable instruction"); |
| 103 | } | 117 | } |
| @@ -114,6 +128,14 @@ Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | |||
| 114 | throw LogicError("Unreachable instruction"); | 128 | throw LogicError("Unreachable instruction"); |
| 115 | } | 129 | } |
| 116 | 130 | ||
| 131 | Id EmitBoundImageGather(EmitContext&) { | ||
| 132 | throw LogicError("Unreachable instruction"); | ||
| 133 | } | ||
| 134 | |||
| 135 | Id EmitBoundImageGatherDref(EmitContext&) { | ||
| 136 | throw LogicError("Unreachable instruction"); | ||
| 137 | } | ||
| 138 | |||
| 117 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 139 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 118 | Id bias_lc, Id offset) { | 140 | Id bias_lc, Id offset) { |
| 119 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 141 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| @@ -152,4 +174,22 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va | |||
| 152 | Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | 174 | Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); |
| 153 | } | 175 | } |
| 154 | 176 | ||
| 177 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||
| 178 | [[maybe_unused]] Id offset2) { | ||
| 179 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 180 | const ImageOperands operands(ctx, offset); | ||
| 181 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | ||
| 182 | ctx.F32[4], Texture(ctx, index), coords, | ||
| 183 | ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), | ||
| 184 | operands.Span()); | ||
| 185 | } | ||
| 186 | |||
| 187 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 188 | Id offset, [[maybe_unused]] Id offset2, Id dref) { | ||
| 189 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 190 | const ImageOperands operands(ctx, offset); | ||
| 191 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | ||
| 192 | ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | ||
| 193 | } | ||
| 194 | |||
| 155 | } // namespace Shader::Backend::SPIRV | 195 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 418b7f5ac..b365a8a6e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -1474,6 +1474,19 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor | |||
| 1474 | return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset); | 1474 | return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset); |
| 1475 | } | 1475 | } |
| 1476 | 1476 | ||
| 1477 | Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, | ||
| 1478 | const Value& offset2, TextureInstInfo info) { | ||
| 1479 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather}; | ||
| 1480 | return Inst(op, Flags{info}, handle, coords, offset, offset2); | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | ||
| 1484 | const Value& offset2, const F32& dref, TextureInstInfo info) { | ||
| 1485 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref | ||
| 1486 | : Opcode::BindlessImageGatherDref}; | ||
| 1487 | return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); | ||
| 1488 | } | ||
| 1489 | |||
| 1477 | U1 IREmitter::VoteAll(const U1& value) { | 1490 | U1 IREmitter::VoteAll(const U1& value) { |
| 1478 | return Inst<U1>(Opcode::VoteAll, value); | 1491 | return Inst<U1>(Opcode::VoteAll, value); |
| 1479 | } | 1492 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 64738735e..04b43197f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -240,6 +240,12 @@ public: | |||
| 240 | const Value& offset, const F32& lod_clamp, | 240 | const Value& offset, const F32& lod_clamp, |
| 241 | TextureInstInfo info); | 241 | TextureInstInfo info); |
| 242 | 242 | ||
| 243 | [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, | ||
| 244 | const Value& offset2, TextureInstInfo info); | ||
| 245 | |||
| 246 | [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | ||
| 247 | const Value& offset2, const F32& dref, TextureInstInfo info); | ||
| 248 | |||
| 243 | [[nodiscard]] U1 VoteAll(const U1& value); | 249 | [[nodiscard]] U1 VoteAll(const U1& value); |
| 244 | [[nodiscard]] U1 VoteAny(const U1& value); | 250 | [[nodiscard]] U1 VoteAny(const U1& value); |
| 245 | [[nodiscard]] U1 VoteEqual(const U1& value); | 251 | [[nodiscard]] U1 VoteEqual(const U1& value); |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 308c00153..4f09a4b39 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -38,6 +38,7 @@ union TextureInstInfo { | |||
| 38 | BitField<8, 1, u32> has_bias; | 38 | BitField<8, 1, u32> has_bias; |
| 39 | BitField<9, 1, u32> has_lod_clamp; | 39 | BitField<9, 1, u32> has_lod_clamp; |
| 40 | BitField<10, 1, u32> relaxed_precision; | 40 | BitField<10, 1, u32> relaxed_precision; |
| 41 | BitField<11, 2, u32> gather_component; | ||
| 41 | }; | 42 | }; |
| 42 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | 43 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); |
| 43 | 44 | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a2479c46a..60a0bc980 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -353,16 +353,22 @@ OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, | |||
| 353 | OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | 353 | OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 354 | OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 354 | OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 355 | OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 355 | OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 356 | OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 357 | OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 356 | 358 | ||
| 357 | OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | 359 | OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 358 | OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | 360 | OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 359 | OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 361 | OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 360 | OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 362 | OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 363 | OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 364 | OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 361 | 365 | ||
| 362 | OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | 366 | OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 363 | OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | 367 | OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 364 | OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 368 | OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 365 | OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 369 | OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 370 | OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 371 | OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 366 | 372 | ||
| 367 | // Warp operations | 373 | // Warp operations |
| 368 | OPCODE(VoteAll, U1, U1, ) | 374 | OPCODE(VoteAll, U1, U1, ) |
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c6cd2a79b..d668dc1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc | |||
| @@ -254,8 +254,8 @@ INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") | |||
| 254 | INST(TEXS, "TEXS", "1101 -00- ---- ----") | 254 | INST(TEXS, "TEXS", "1101 -00- ---- ----") |
| 255 | INST(TLD, "TLD", "1101 1100 --11 1---") | 255 | INST(TLD, "TLD", "1101 1100 --11 1---") |
| 256 | INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") | 256 | INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") |
| 257 | INST(TLD4, "TLD4", "1100 10-- --11 1---") | 257 | INST(TLD4, "TLD4", "1100 10-- ---- ----") |
| 258 | INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") | 258 | INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") |
| 259 | INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") | 259 | INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") |
| 260 | INST(TLDS, "TLDS", "1101 -01- ---- ----") | 260 | INST(TLDS, "TLDS", "1101 -01- ---- ----") |
| 261 | INST(TMML, "TMML", "1101 1111 0101 1---") | 261 | INST(TMML, "TMML", "1101 1111 0101 1---") |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3ccd7b925..e59c3326e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -349,14 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { | |||
| 349 | ThrowNotImplemented(Opcode::TLD_b); | 349 | ThrowNotImplemented(Opcode::TLD_b); |
| 350 | } | 350 | } |
| 351 | 351 | ||
| 352 | void TranslatorVisitor::TLD4(u64) { | ||
| 353 | ThrowNotImplemented(Opcode::TLD4); | ||
| 354 | } | ||
| 355 | |||
| 356 | void TranslatorVisitor::TLD4_b(u64) { | ||
| 357 | ThrowNotImplemented(Opcode::TLD4_b); | ||
| 358 | } | ||
| 359 | |||
| 360 | void TranslatorVisitor::TLD4S(u64) { | 352 | void TranslatorVisitor::TLD4S(u64) { |
| 361 | ThrowNotImplemented(Opcode::TLD4S); | 353 | ThrowNotImplemented(Opcode::TLD4S); |
| 362 | } | 354 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..d64865876 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp | |||
| @@ -0,0 +1,209 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class OffsetType : u64 { | ||
| 27 | None = 0, | ||
| 28 | AOFFI, | ||
| 29 | PTP, | ||
| 30 | Invalid, | ||
| 31 | }; | ||
| 32 | |||
| 33 | enum class ComponentType : u64 { | ||
| 34 | R = 0, | ||
| 35 | G = 1, | ||
| 36 | B = 2, | ||
| 37 | A = 3, | ||
| 38 | }; | ||
| 39 | |||
| 40 | Shader::TextureType GetType(TextureType type, bool dc) { | ||
| 41 | switch (type) { | ||
| 42 | case TextureType::_1D: | ||
| 43 | return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; | ||
| 44 | case TextureType::ARRAY_1D: | ||
| 45 | return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; | ||
| 46 | case TextureType::_2D: | ||
| 47 | return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; | ||
| 48 | case TextureType::ARRAY_2D: | ||
| 49 | return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; | ||
| 50 | case TextureType::_3D: | ||
| 51 | return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; | ||
| 52 | case TextureType::ARRAY_3D: | ||
| 53 | throw NotImplementedException("3D array texture type"); | ||
| 54 | case TextureType::CUBE: | ||
| 55 | return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; | ||
| 56 | case TextureType::ARRAY_CUBE: | ||
| 57 | return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; | ||
| 58 | } | ||
| 59 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 60 | } | ||
| 61 | |||
| 62 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 63 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 64 | switch (type) { | ||
| 65 | case TextureType::_1D: | ||
| 66 | return v.F(reg); | ||
| 67 | case TextureType::ARRAY_1D: | ||
| 68 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); | ||
| 69 | case TextureType::_2D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 71 | case TextureType::ARRAY_2D: | ||
| 72 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); | ||
| 73 | case TextureType::_3D: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_3D: | ||
| 76 | throw NotImplementedException("3D array texture type"); | ||
| 77 | case TextureType::CUBE: | ||
| 78 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 79 | case TextureType::ARRAY_CUBE: | ||
| 80 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 81 | } | ||
| 82 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 86 | const IR::U32 value{v.X(reg++)}; | ||
| 87 | switch (type) { | ||
| 88 | case TextureType::_1D: | ||
| 89 | case TextureType::ARRAY_1D: | ||
| 90 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); | ||
| 91 | case TextureType::_2D: | ||
| 92 | case TextureType::ARRAY_2D: | ||
| 93 | return v.ir.CompositeConstruct( | ||
| 94 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 95 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 96 | case TextureType::_3D: | ||
| 97 | case TextureType::ARRAY_3D: | ||
| 98 | return v.ir.CompositeConstruct( | ||
| 99 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 100 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | ||
| 101 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); | ||
| 102 | case TextureType::CUBE: | ||
| 103 | case TextureType::ARRAY_CUBE: | ||
| 104 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 105 | } | ||
| 106 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 107 | } | ||
| 108 | |||
| 109 | std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | ||
| 110 | const IR::U32 value1{v.X(reg++)}; | ||
| 111 | const IR::U32 value2{v.X(reg++)}; | ||
| 112 | const auto getVector = ([&v](const IR::U32& value) { | ||
| 113 | return v.ir.CompositeConstruct( | ||
| 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | ||
| 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), | ||
| 117 | v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); | ||
| 118 | }); | ||
| 119 | return {getVector(value1), getVector(value2)}; | ||
| 120 | } | ||
| 121 | |||
| 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | ||
| 123 | bool is_bindless) { | ||
| 124 | union { | ||
| 125 | u64 raw; | ||
| 126 | BitField<35, 1, u64> ndv; | ||
| 127 | BitField<49, 1, u64> nodep; | ||
| 128 | BitField<50, 1, u64> dc; | ||
| 129 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 130 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 131 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 132 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 133 | BitField<28, 3, TextureType> type; | ||
| 134 | BitField<31, 4, u64> mask; | ||
| 135 | BitField<36, 13, u64> cbuf_offset; | ||
| 136 | } const tld4{insn}; | ||
| 137 | |||
| 138 | const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; | ||
| 139 | |||
| 140 | IR::Reg meta_reg{tld4.meta_reg}; | ||
| 141 | IR::Value handle; | ||
| 142 | IR::Value offset; | ||
| 143 | IR::Value offset2; | ||
| 144 | IR::F32 dref; | ||
| 145 | if (!is_bindless) { | ||
| 146 | handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); | ||
| 147 | } else { | ||
| 148 | handle = v.X(meta_reg++); | ||
| 149 | } | ||
| 150 | switch (offset_type) { | ||
| 151 | case OffsetType::None: | ||
| 152 | break; | ||
| 153 | case OffsetType::AOFFI: { | ||
| 154 | offset = MakeOffset(v, meta_reg, tld4.type); | ||
| 155 | break; | ||
| 156 | } | ||
| 157 | case OffsetType::PTP: { | ||
| 158 | std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); | ||
| 159 | break; | ||
| 160 | } | ||
| 161 | default: | ||
| 162 | throw NotImplementedException("Invalid offset type {}", offset_type); | ||
| 163 | } | ||
| 164 | if (tld4.dc != 0) { | ||
| 165 | dref = v.F(meta_reg++); | ||
| 166 | } | ||
| 167 | IR::TextureInstInfo info{}; | ||
| 168 | info.type.Assign(GetType(tld4.type, tld4.dc != 0)); | ||
| 169 | info.gather_component.Assign(static_cast<u32>(component_type)); | ||
| 170 | const IR::Value sample{[&]() -> IR::Value { | ||
| 171 | if (tld4.dc == 0) { | ||
| 172 | return v.ir.ImageGather(handle, coords, offset, offset2, info); | ||
| 173 | } | ||
| 174 | return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); | ||
| 175 | }()}; | ||
| 176 | |||
| 177 | IR::Reg dest_reg{tld4.dest_reg}; | ||
| 178 | for (size_t element = 0; element < 4; ++element) { | ||
| 179 | if (((tld4.mask >> element) & 1) == 0) { | ||
| 180 | continue; | ||
| 181 | } | ||
| 182 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 183 | ++dest_reg; | ||
| 184 | } | ||
| 185 | if (tld4.sparse_pred != IR::Pred::PT) { | ||
| 186 | v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 187 | } | ||
| 188 | } | ||
| 189 | } // Anonymous namespace | ||
| 190 | |||
| 191 | void TranslatorVisitor::TLD4(u64 insn) { | ||
| 192 | union { | ||
| 193 | u64 raw; | ||
| 194 | BitField<56, 2, ComponentType> component; | ||
| 195 | BitField<54, 2, OffsetType> offset; | ||
| 196 | } const tld4{insn}; | ||
| 197 | Impl(*this, insn, tld4.component, tld4.offset, false); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::TLD4_b(u64 insn) { | ||
| 201 | union { | ||
| 202 | u64 raw; | ||
| 203 | BitField<38, 2, ComponentType> component; | ||
| 204 | BitField<36, 2, OffsetType> offset; | ||
| 205 | } const tld4{insn}; | ||
| 206 | Impl(*this, insn, tld4.component, tld4.offset, true); | ||
| 207 | } | ||
| 208 | |||
| 209 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 61cc314c7..6fe06fda8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -352,14 +352,20 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 352 | case IR::Opcode::BindlessImageSampleExplicitLod: | 352 | case IR::Opcode::BindlessImageSampleExplicitLod: |
| 353 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | 353 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: |
| 354 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | 354 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: |
| 355 | case IR::Opcode::BindlessImageGather: | ||
| 356 | case IR::Opcode::BindlessImageGatherDref: | ||
| 355 | case IR::Opcode::BoundImageSampleImplicitLod: | 357 | case IR::Opcode::BoundImageSampleImplicitLod: |
| 356 | case IR::Opcode::BoundImageSampleExplicitLod: | 358 | case IR::Opcode::BoundImageSampleExplicitLod: |
| 357 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | 359 | case IR::Opcode::BoundImageSampleDrefImplicitLod: |
| 358 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | 360 | case IR::Opcode::BoundImageSampleDrefExplicitLod: |
| 361 | case IR::Opcode::BoundImageGather: | ||
| 362 | case IR::Opcode::BoundImageGatherDref: | ||
| 359 | case IR::Opcode::ImageSampleImplicitLod: | 363 | case IR::Opcode::ImageSampleImplicitLod: |
| 360 | case IR::Opcode::ImageSampleExplicitLod: | 364 | case IR::Opcode::ImageSampleExplicitLod: |
| 361 | case IR::Opcode::ImageSampleDrefImplicitLod: | 365 | case IR::Opcode::ImageSampleDrefImplicitLod: |
| 362 | case IR::Opcode::ImageSampleDrefExplicitLod: { | 366 | case IR::Opcode::ImageSampleDrefExplicitLod: |
| 367 | case IR::Opcode::ImageGather: | ||
| 368 | case IR::Opcode::ImageGatherDref: { | ||
| 363 | const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; | 369 | const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; |
| 364 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || | 370 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || |
| 365 | type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; | 371 | type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3dab424f6..28060dccf 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -403,6 +403,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | |||
| 403 | return (base >> shift) & ((1U << count) - 1); | 403 | return (base >> shift) & ((1U << count) - 1); |
| 404 | }); | 404 | }); |
| 405 | return; | 405 | return; |
| 406 | case IR::Opcode::BitFieldSExtract: | ||
| 407 | FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { | ||
| 408 | const size_t back_shift = static_cast<size_t>(shift) + static_cast<size_t>(count); | ||
| 409 | if (back_shift > Common::BitSize<s32>()) { | ||
| 410 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, | ||
| 411 | base, shift, count); | ||
| 412 | } | ||
| 413 | const size_t left_shift = Common::BitSize<s32>() - back_shift; | ||
| 414 | return static_cast<u32>(static_cast<s32>(base << left_shift) >> | ||
| 415 | static_cast<size_t>(Common::BitSize<s32>() - count)); | ||
| 416 | }); | ||
| 417 | return; | ||
| 406 | case IR::Opcode::BranchConditional: | 418 | case IR::Opcode::BranchConditional: |
| 407 | return FoldBranchConditional(inst); | 419 | return FoldBranchConditional(inst); |
| 408 | default: | 420 | default: |
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 2c8164b8a..454ac3e71 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -45,6 +45,12 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { | |||
| 45 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | 45 | case IR::Opcode::BoundImageSampleDrefExplicitLod: |
| 46 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | 46 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: |
| 47 | return IR::Opcode::ImageSampleDrefExplicitLod; | 47 | return IR::Opcode::ImageSampleDrefExplicitLod; |
| 48 | case IR::Opcode::BindlessImageGather: | ||
| 49 | case IR::Opcode::BoundImageGather: | ||
| 50 | return IR::Opcode::ImageGather; | ||
| 51 | case IR::Opcode::BindlessImageGatherDref: | ||
| 52 | case IR::Opcode::BoundImageGatherDref: | ||
| 53 | return IR::Opcode::ImageGatherDref; | ||
| 48 | default: | 54 | default: |
| 49 | return IR::Opcode::Void; | 55 | return IR::Opcode::Void; |
| 50 | } | 56 | } |
| @@ -56,11 +62,15 @@ bool IsBindless(const IR::Inst& inst) { | |||
| 56 | case IR::Opcode::BindlessImageSampleExplicitLod: | 62 | case IR::Opcode::BindlessImageSampleExplicitLod: |
| 57 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | 63 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: |
| 58 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | 64 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: |
| 65 | case IR::Opcode::BindlessImageGather: | ||
| 66 | case IR::Opcode::BindlessImageGatherDref: | ||
| 59 | return true; | 67 | return true; |
| 60 | case IR::Opcode::BoundImageSampleImplicitLod: | 68 | case IR::Opcode::BoundImageSampleImplicitLod: |
| 61 | case IR::Opcode::BoundImageSampleExplicitLod: | 69 | case IR::Opcode::BoundImageSampleExplicitLod: |
| 62 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | 70 | case IR::Opcode::BoundImageSampleDrefImplicitLod: |
| 63 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | 71 | case IR::Opcode::BoundImageSampleDrefExplicitLod: |
| 72 | case IR::Opcode::BoundImageGather: | ||
| 73 | case IR::Opcode::BoundImageGatherDref: | ||
| 64 | return false; | 74 | return false; |
| 65 | default: | 75 | default: |
| 66 | throw InvalidArgument("Invalid opcode {}", inst.Opcode()); | 76 | throw InvalidArgument("Invalid opcode {}", inst.Opcode()); |