diff options
Diffstat (limited to 'src/shader_recompiler/frontend')
| -rw-r--r-- | src/shader_recompiler/frontend/ir/attribute.cpp | 6 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/attribute.h | 5 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/ir_emitter.cpp | 22 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/ir_emitter.h | 10 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/opcodes.h | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/opcodes.inc | 7 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/type.h | 31 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/value.cpp | 3 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/value.h | 14 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | 8 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 118 |
11 files changed, 135 insertions, 90 deletions
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7d3d882e4..1bf9db935 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp | |||
| @@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) { | |||
| 446 | return "ViewportMask"; | 446 | return "ViewportMask"; |
| 447 | case Attribute::FrontFace: | 447 | case Attribute::FrontFace: |
| 448 | return "FrontFace"; | 448 | return "FrontFace"; |
| 449 | case Attribute::BaseInstance: | ||
| 450 | return "BaseInstance"; | ||
| 451 | case Attribute::BaseVertex: | ||
| 452 | return "BaseVertex"; | ||
| 453 | case Attribute::DrawID: | ||
| 454 | return "DrawID"; | ||
| 449 | } | 455 | } |
| 450 | return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); | 456 | return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); |
| 451 | } | 457 | } |
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 6ee3947b1..5f039b6f6 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h | |||
| @@ -219,6 +219,11 @@ enum class Attribute : u64 { | |||
| 219 | FixedFncTexture9Q = 231, | 219 | FixedFncTexture9Q = 231, |
| 220 | ViewportMask = 232, | 220 | ViewportMask = 232, |
| 221 | FrontFace = 255, | 221 | FrontFace = 255, |
| 222 | |||
| 223 | // Implementation attributes | ||
| 224 | BaseInstance = 256, | ||
| 225 | BaseVertex = 257, | ||
| 226 | DrawID = 258, | ||
| 222 | }; | 227 | }; |
| 223 | 228 | ||
| 224 | constexpr size_t NUM_GENERICS = 32; | 229 | constexpr size_t NUM_GENERICS = 32; |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0cdac0eff..b7caa4246 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | |||
| 294 | return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | 294 | return Inst<F32>(Opcode::GetAttribute, attribute, vertex); |
| 295 | } | 295 | } |
| 296 | 296 | ||
| 297 | U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { | ||
| 298 | return GetAttributeU32(attribute, Imm32(0)); | ||
| 299 | } | ||
| 300 | |||
| 301 | U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { | ||
| 302 | return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex); | ||
| 303 | } | ||
| 304 | |||
| 297 | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | 305 | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { |
| 298 | Inst(Opcode::SetAttribute, attribute, value, vertex); | 306 | Inst(Opcode::SetAttribute, attribute, value, vertex); |
| 299 | } | 307 | } |
| @@ -696,11 +704,6 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { | |||
| 696 | } | 704 | } |
| 697 | 705 | ||
| 698 | template <> | 706 | template <> |
| 699 | IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) { | ||
| 700 | return Inst<IR::S32>(Opcode::BitCastS32F32, value); | ||
| 701 | } | ||
| 702 | |||
| 703 | template <> | ||
| 704 | IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { | 707 | IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { |
| 705 | return Inst<IR::F32>(Opcode::BitCastF32U32, value); | 708 | return Inst<IR::F32>(Opcode::BitCastF32U32, value); |
| 706 | } | 709 | } |
| @@ -1843,15 +1846,16 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu | |||
| 1843 | return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); | 1846 | return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); |
| 1844 | } | 1847 | } |
| 1845 | 1848 | ||
| 1846 | Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { | 1849 | Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, |
| 1850 | const IR::U1& skip_mips) { | ||
| 1847 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions | 1851 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions |
| 1848 | : Opcode::BindlessImageQueryDimensions}; | 1852 | : Opcode::BindlessImageQueryDimensions}; |
| 1849 | return Inst(op, handle, lod); | 1853 | return Inst(op, handle, lod, skip_mips); |
| 1850 | } | 1854 | } |
| 1851 | 1855 | ||
| 1852 | Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, | 1856 | Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, |
| 1853 | TextureInstInfo info) { | 1857 | const IR::U1& skip_mips, TextureInstInfo info) { |
| 1854 | return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod); | 1858 | return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); |
| 1855 | } | 1859 | } |
| 1856 | 1860 | ||
| 1857 | Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { | 1861 | Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2df992feb..f3c81dbe1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -74,6 +74,8 @@ public: | |||
| 74 | 74 | ||
| 75 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | 75 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); |
| 76 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | 76 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); |
| 77 | [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); | ||
| 78 | [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex); | ||
| 77 | void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | 79 | void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); |
| 78 | 80 | ||
| 79 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | 81 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); |
| @@ -318,9 +320,10 @@ public: | |||
| 318 | [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, | 320 | [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, |
| 319 | const F32& dref, const F32& lod, | 321 | const F32& dref, const F32& lod, |
| 320 | const Value& offset, TextureInstInfo info); | 322 | const Value& offset, TextureInstInfo info); |
| 321 | [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); | ||
| 322 | [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, | 323 | [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, |
| 323 | TextureInstInfo info); | 324 | const IR::U1& skip_mips); |
| 325 | [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, | ||
| 326 | const IR::U1& skip_mips, TextureInstInfo info); | ||
| 324 | 327 | ||
| 325 | [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, | 328 | [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, |
| 326 | TextureInstInfo info); | 329 | TextureInstInfo info); |
| @@ -406,7 +409,8 @@ private: | |||
| 406 | } | 409 | } |
| 407 | 410 | ||
| 408 | template <typename T> | 411 | template <typename T> |
| 409 | requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { | 412 | requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) |
| 413 | struct Flags { | ||
| 410 | Flags() = default; | 414 | Flags() = default; |
| 411 | Flags(T proxy_) : proxy{proxy_} {} | 415 | Flags(T proxy_) : proxy{proxy_} {} |
| 412 | 416 | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index d155afd0f..e300714f3 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h | |||
| @@ -38,7 +38,6 @@ constexpr Type U8{Type::U8}; | |||
| 38 | constexpr Type U16{Type::U16}; | 38 | constexpr Type U16{Type::U16}; |
| 39 | constexpr Type U32{Type::U32}; | 39 | constexpr Type U32{Type::U32}; |
| 40 | constexpr Type U64{Type::U64}; | 40 | constexpr Type U64{Type::U64}; |
| 41 | constexpr Type S32{Type::S32}; | ||
| 42 | constexpr Type F16{Type::F16}; | 41 | constexpr Type F16{Type::F16}; |
| 43 | constexpr Type F32{Type::F32}; | 42 | constexpr Type F32{Type::F32}; |
| 44 | constexpr Type F64{Type::F64}; | 43 | constexpr Type F64{Type::F64}; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1fe3749cc..4447d67b0 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -175,7 +175,6 @@ OPCODE(SelectF64, F64, U1, | |||
| 175 | OPCODE(BitCastU16F16, U16, F16, ) | 175 | OPCODE(BitCastU16F16, U16, F16, ) |
| 176 | OPCODE(BitCastU32F32, U32, F32, ) | 176 | OPCODE(BitCastU32F32, U32, F32, ) |
| 177 | OPCODE(BitCastU64F64, U64, F64, ) | 177 | OPCODE(BitCastU64F64, U64, F64, ) |
| 178 | OPCODE(BitCastS32F32, S32, F32, ) | ||
| 179 | OPCODE(BitCastF16U16, F16, U16, ) | 178 | OPCODE(BitCastF16U16, F16, U16, ) |
| 180 | OPCODE(BitCastF32U32, F32, U32, ) | 179 | OPCODE(BitCastF32U32, F32, U32, ) |
| 181 | OPCODE(BitCastF64U64, F64, U64, ) | 180 | OPCODE(BitCastF64U64, F64, U64, ) |
| @@ -483,7 +482,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, | |||
| 483 | OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | 482 | OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 484 | OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | 483 | OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) |
| 485 | OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) | 484 | OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) |
| 486 | OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) | 485 | OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, U1, ) |
| 487 | OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) | 486 | OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) |
| 488 | OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) | 487 | OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) |
| 489 | OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) | 488 | OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) |
| @@ -496,7 +495,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, | |||
| 496 | OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | 495 | OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) |
| 497 | OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | 496 | OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) |
| 498 | OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) | 497 | OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) |
| 499 | OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) | 498 | OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, U1, ) |
| 500 | OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) | 499 | OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) |
| 501 | OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) | 500 | OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) |
| 502 | OPCODE(BoundImageRead, U32x4, U32, Opaque, ) | 501 | OPCODE(BoundImageRead, U32x4, U32, Opaque, ) |
| @@ -509,7 +508,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq | |||
| 509 | OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) | 508 | OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) |
| 510 | OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) | 509 | OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) |
| 511 | OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) | 510 | OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) |
| 512 | OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) | 511 | OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) |
| 513 | OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) | 512 | OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) |
| 514 | OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) | 513 | OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) |
| 515 | OPCODE(ImageRead, U32x4, Opaque, Opaque, ) | 514 | OPCODE(ImageRead, U32x4, Opaque, Opaque, ) |
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 5a7c706ad..04c8c4ddb 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h | |||
| @@ -24,22 +24,21 @@ enum class Type { | |||
| 24 | U16 = 1 << 7, | 24 | U16 = 1 << 7, |
| 25 | U32 = 1 << 8, | 25 | U32 = 1 << 8, |
| 26 | U64 = 1 << 9, | 26 | U64 = 1 << 9, |
| 27 | S32 = 1 << 10, | 27 | F16 = 1 << 10, |
| 28 | F16 = 1 << 11, | 28 | F32 = 1 << 11, |
| 29 | F32 = 1 << 12, | 29 | F64 = 1 << 12, |
| 30 | F64 = 1 << 13, | 30 | U32x2 = 1 << 13, |
| 31 | U32x2 = 1 << 14, | 31 | U32x3 = 1 << 14, |
| 32 | U32x3 = 1 << 15, | 32 | U32x4 = 1 << 15, |
| 33 | U32x4 = 1 << 16, | 33 | F16x2 = 1 << 16, |
| 34 | F16x2 = 1 << 17, | 34 | F16x3 = 1 << 17, |
| 35 | F16x3 = 1 << 18, | 35 | F16x4 = 1 << 18, |
| 36 | F16x4 = 1 << 19, | 36 | F32x2 = 1 << 19, |
| 37 | F32x2 = 1 << 20, | 37 | F32x3 = 1 << 20, |
| 38 | F32x3 = 1 << 21, | 38 | F32x4 = 1 << 21, |
| 39 | F32x4 = 1 << 22, | 39 | F64x2 = 1 << 22, |
| 40 | F64x2 = 1 << 23, | 40 | F64x3 = 1 << 23, |
| 41 | F64x3 = 1 << 24, | 41 | F64x4 = 1 << 24, |
| 42 | F64x4 = 1 << 25, | ||
| 43 | }; | 42 | }; |
| 44 | DECLARE_ENUM_FLAG_OPERATORS(Type) | 43 | DECLARE_ENUM_FLAG_OPERATORS(Type) |
| 45 | 44 | ||
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 30ba12316..346169328 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp | |||
| @@ -23,8 +23,6 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} | |||
| 23 | 23 | ||
| 24 | Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} | 24 | Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} |
| 25 | 25 | ||
| 26 | Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {} | ||
| 27 | |||
| 28 | Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} | 26 | Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} |
| 29 | 27 | ||
| 30 | Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} | 28 | Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} |
| @@ -71,7 +69,6 @@ bool Value::operator==(const Value& other) const { | |||
| 71 | return imm_u16 == other.imm_u16; | 69 | return imm_u16 == other.imm_u16; |
| 72 | case Type::U32: | 70 | case Type::U32: |
| 73 | case Type::F32: | 71 | case Type::F32: |
| 74 | case Type::S32: | ||
| 75 | return imm_u32 == other.imm_u32; | 72 | return imm_u32 == other.imm_u32; |
| 76 | case Type::U64: | 73 | case Type::U64: |
| 77 | case Type::F64: | 74 | case Type::F64: |
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8b34356fd..22e89dd1b 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -101,9 +101,8 @@ public: | |||
| 101 | TypedValue() = default; | 101 | TypedValue() = default; |
| 102 | 102 | ||
| 103 | template <IR::Type other_type> | 103 | template <IR::Type other_type> |
| 104 | requires((other_type & type_) != IR::Type::Void) explicit(false) | 104 | requires((other_type & type_) != IR::Type::Void) |
| 105 | TypedValue(const TypedValue<other_type>& value) | 105 | explicit(false) TypedValue(const TypedValue<other_type>& value) : Value(value) {} |
| 106 | : Value(value) {} | ||
| 107 | 106 | ||
| 108 | explicit TypedValue(const Value& value) : Value(value) { | 107 | explicit TypedValue(const Value& value) : Value(value) { |
| 109 | if ((value.Type() & type_) == IR::Type::Void) { | 108 | if ((value.Type() & type_) == IR::Type::Void) { |
| @@ -194,16 +193,16 @@ public: | |||
| 194 | void ReplaceOpcode(IR::Opcode opcode); | 193 | void ReplaceOpcode(IR::Opcode opcode); |
| 195 | 194 | ||
| 196 | template <typename FlagsType> | 195 | template <typename FlagsType> |
| 197 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | 196 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) |
| 198 | [[nodiscard]] FlagsType Flags() const noexcept { | 197 | [[nodiscard]] FlagsType Flags() const noexcept { |
| 199 | FlagsType ret; | 198 | FlagsType ret; |
| 200 | std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); | 199 | std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); |
| 201 | return ret; | 200 | return ret; |
| 202 | } | 201 | } |
| 203 | 202 | ||
| 204 | template <typename FlagsType> | 203 | template <typename FlagsType> |
| 205 | requires(sizeof(FlagsType) <= sizeof(u32) && | 204 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) |
| 206 | std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept { | 205 | void SetFlags(FlagsType value) noexcept { |
| 207 | std::memcpy(&flags, &value, sizeof(value)); | 206 | std::memcpy(&flags, &value, sizeof(value)); |
| 208 | } | 207 | } |
| 209 | 208 | ||
| @@ -268,7 +267,6 @@ using U8 = TypedValue<Type::U8>; | |||
| 268 | using U16 = TypedValue<Type::U16>; | 267 | using U16 = TypedValue<Type::U16>; |
| 269 | using U32 = TypedValue<Type::U32>; | 268 | using U32 = TypedValue<Type::U32>; |
| 270 | using U64 = TypedValue<Type::U64>; | 269 | using U64 = TypedValue<Type::U64>; |
| 271 | using S32 = TypedValue<Type::S32>; | ||
| 272 | using F16 = TypedValue<Type::F16>; | 270 | using F16 = TypedValue<Type::F16>; |
| 273 | using F32 = TypedValue<Type::F32>; | 271 | using F32 = TypedValue<Type::F32>; |
| 274 | using F64 = TypedValue<Type::F64>; | 272 | using F64 = TypedValue<Type::F64>; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index f8cfd4ab6..39af62559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | |||
| @@ -15,11 +15,13 @@ enum class Mode : u64 { | |||
| 15 | SamplePos = 5, | 15 | SamplePos = 5, |
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { | 18 | IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg, u64 mask) { |
| 19 | switch (mode) { | 19 | switch (mode) { |
| 20 | case Mode::Dimension: { | 20 | case Mode::Dimension: { |
| 21 | const bool needs_num_mips{((mask >> 3) & 1) != 0}; | ||
| 22 | const IR::U1 skip_mips{v.ir.Imm1(!needs_num_mips)}; | ||
| 21 | const IR::U32 lod{v.X(src_reg)}; | 23 | const IR::U32 lod{v.X(src_reg)}; |
| 22 | return v.ir.ImageQueryDimension(handle, lod); | 24 | return v.ir.ImageQueryDimension(handle, lod, skip_mips); |
| 23 | } | 25 | } |
| 24 | case Mode::TextureType: | 26 | case Mode::TextureType: |
| 25 | case Mode::SamplePos: | 27 | case Mode::SamplePos: |
| @@ -46,7 +48,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { | |||
| 46 | handle = v.X(src_reg); | 48 | handle = v.X(src_reg); |
| 47 | ++src_reg; | 49 | ++src_reg; |
| 48 | } | 50 | } |
| 49 | const IR::Value query{Query(v, handle, txq.mode, src_reg)}; | 51 | const IR::Value query{Query(v, handle, txq.mode, src_reg, txq.mask)}; |
| 50 | IR::Reg dest_reg{txq.dest_reg}; | 52 | IR::Reg dest_reg{txq.dest_reg}; |
| 51 | for (int element = 0; element < 4; ++element) { | 53 | for (int element = 0; element < 4; ++element) { |
| 52 | if (((txq.mask >> element) & 1) == 0) { | 54 | if (((txq.mask >> element) & 1) == 0) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 3adbd2b16..a42453e90 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( | |||
| 171 | } | 171 | } |
| 172 | return mapping; | 172 | return mapping; |
| 173 | } | 173 | } |
| 174 | |||
| 175 | void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program, | ||
| 176 | const Shader::VaryingState& passthrough_mask, | ||
| 177 | bool passthrough_position, | ||
| 178 | std::optional<IR::Attribute> passthrough_layer_attr) { | ||
| 179 | for (u32 i = 0; i < program.output_vertices; i++) { | ||
| 180 | // Assign generics from input | ||
| 181 | for (u32 j = 0; j < 32; j++) { | ||
| 182 | if (!passthrough_mask.Generic(j)) { | ||
| 183 | continue; | ||
| 184 | } | ||
| 185 | |||
| 186 | const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); | ||
| 187 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 188 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 189 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 190 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 191 | } | ||
| 192 | |||
| 193 | if (passthrough_position) { | ||
| 194 | // Assign position from input | ||
| 195 | const IR::Attribute attr = IR::Attribute::PositionX; | ||
| 196 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 197 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 198 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 199 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (passthrough_layer_attr) { | ||
| 203 | // Assign layer | ||
| 204 | ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr), | ||
| 205 | ir.Imm32(0)); | ||
| 206 | } | ||
| 207 | |||
| 208 | // Emit vertex | ||
| 209 | ir.EmitVertex(ir.Imm32(0)); | ||
| 210 | } | ||
| 211 | ir.EndPrimitive(ir.Imm32(0)); | ||
| 212 | } | ||
| 213 | |||
| 214 | u32 GetOutputTopologyVertices(OutputTopology output_topology) { | ||
| 215 | switch (output_topology) { | ||
| 216 | case OutputTopology::PointList: | ||
| 217 | return 1; | ||
| 218 | case OutputTopology::LineStrip: | ||
| 219 | return 2; | ||
| 220 | default: | ||
| 221 | return 3; | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) { | ||
| 226 | for (IR::Block* const block : program.blocks) { | ||
| 227 | for (IR::Inst& inst : block->Instructions()) { | ||
| 228 | if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||
| 229 | IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 230 | EmitGeometryPassthrough( | ||
| 231 | ir, program, program.info.passthrough, | ||
| 232 | program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {}); | ||
| 233 | } | ||
| 234 | } | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 174 | } // Anonymous namespace | 238 | } // Anonymous namespace |
| 175 | 239 | ||
| 176 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 240 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| @@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 195 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; | 259 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; |
| 196 | if (program.is_geometry_passthrough) { | 260 | if (program.is_geometry_passthrough) { |
| 197 | const auto& mask{env.GpPassthroughMask()}; | 261 | const auto& mask{env.GpPassthroughMask()}; |
| 198 | for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { | 262 | for (size_t i = 0; i < mask.size() * 32; ++i) { |
| 199 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; | 263 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; |
| 200 | } | 264 | } |
| 265 | |||
| 266 | if (!host_info.support_geometry_shader_passthrough) { | ||
| 267 | program.output_vertices = GetOutputTopologyVertices(program.output_topology); | ||
| 268 | LowerGeometryPassthrough(program, host_info); | ||
| 269 | } | ||
| 201 | } | 270 | } |
| 202 | break; | 271 | break; |
| 203 | } | 272 | } |
| @@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 219 | } | 288 | } |
| 220 | Optimization::SsaRewritePass(program); | 289 | Optimization::SsaRewritePass(program); |
| 221 | 290 | ||
| 222 | Optimization::ConstantPropagationPass(program); | 291 | Optimization::ConstantPropagationPass(env, program); |
| 223 | 292 | ||
| 224 | Optimization::PositionPass(env, program); | 293 | Optimization::PositionPass(env, program); |
| 225 | 294 | ||
| 226 | Optimization::GlobalMemoryToStorageBufferPass(program); | 295 | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); |
| 227 | Optimization::TexturePass(env, program, host_info); | 296 | Optimization::TexturePass(env, program, host_info); |
| 228 | 297 | ||
| 229 | if (Settings::values.resolution_info.active) { | 298 | if (Settings::values.resolution_info.active) { |
| @@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, | |||
| 342 | IR::Program program; | 411 | IR::Program program; |
| 343 | program.stage = Stage::Geometry; | 412 | program.stage = Stage::Geometry; |
| 344 | program.output_topology = output_topology; | 413 | program.output_topology = output_topology; |
| 345 | switch (output_topology) { | 414 | program.output_vertices = GetOutputTopologyVertices(output_topology); |
| 346 | case OutputTopology::PointList: | ||
| 347 | program.output_vertices = 1; | ||
| 348 | break; | ||
| 349 | case OutputTopology::LineStrip: | ||
| 350 | program.output_vertices = 2; | ||
| 351 | break; | ||
| 352 | default: | ||
| 353 | program.output_vertices = 3; | ||
| 354 | break; | ||
| 355 | } | ||
| 356 | 415 | ||
| 357 | program.is_geometry_passthrough = false; | 416 | program.is_geometry_passthrough = false; |
| 358 | program.info.loads.mask = source_program.info.stores.mask; | 417 | program.info.loads.mask = source_program.info.stores.mask; |
| @@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, | |||
| 366 | node.data.block = current_block; | 425 | node.data.block = current_block; |
| 367 | 426 | ||
| 368 | IR::IREmitter ir{*current_block}; | 427 | IR::IREmitter ir{*current_block}; |
| 369 | for (u32 i = 0; i < program.output_vertices; i++) { | 428 | EmitGeometryPassthrough(ir, program, program.info.stores, true, |
| 370 | // Assign generics from input | 429 | source_program.info.emulated_layer); |
| 371 | for (u32 j = 0; j < 32; j++) { | ||
| 372 | if (!program.info.stores.Generic(j)) { | ||
| 373 | continue; | ||
| 374 | } | ||
| 375 | |||
| 376 | const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); | ||
| 377 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 378 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 379 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 380 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 381 | } | ||
| 382 | |||
| 383 | // Assign position from input | ||
| 384 | const IR::Attribute attr = IR::Attribute::PositionX; | ||
| 385 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 386 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 387 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 388 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 389 | |||
| 390 | // Assign layer | ||
| 391 | ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), | ||
| 392 | ir.Imm32(0)); | ||
| 393 | |||
| 394 | // Emit vertex | ||
| 395 | ir.EmitVertex(ir.Imm32(0)); | ||
| 396 | } | ||
| 397 | ir.EndPrimitive(ir.Imm32(0)); | ||
| 398 | 430 | ||
| 399 | IR::Block* return_block{block_pool.Create(inst_pool)}; | 431 | IR::Block* return_block{block_pool.Create(inst_pool)}; |
| 400 | IR::IREmitter{*return_block}.Epilogue(); | 432 | IR::IREmitter{*return_block}.Epilogue(); |