diff options
| author | 2021-02-20 03:30:13 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:22 -0400 | |
| commit | e2bc05b17d91854cbb9c0ce3647141bf7d33143e (patch) | |
| tree | 96769db006b6015cd536483db98ee0697aee4992 | |
| parent | spirv: Add lower fp16 to fp32 pass (diff) | |
| download | yuzu-e2bc05b17d91854cbb9c0ce3647141bf7d33143e.tar.gz yuzu-e2bc05b17d91854cbb9c0ce3647141bf7d33143e.tar.xz yuzu-e2bc05b17d91854cbb9c0ce3647141bf7d33143e.zip | |
shader: Add denorm flush support
Diffstat (limited to '')
20 files changed, 260 insertions, 93 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6047f3ebe..fbd4ec6dc 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC | |||
| 32 | frontend/ir/ir_emitter.h | 32 | frontend/ir/ir_emitter.h |
| 33 | frontend/ir/microinstruction.cpp | 33 | frontend/ir/microinstruction.cpp |
| 34 | frontend/ir/microinstruction.h | 34 | frontend/ir/microinstruction.h |
| 35 | frontend/ir/modifiers.h | ||
| 35 | frontend/ir/opcodes.cpp | 36 | frontend/ir/opcodes.cpp |
| 36 | frontend/ir/opcodes.h | 37 | frontend/ir/opcodes.h |
| 37 | frontend/ir/opcodes.inc | 38 | frontend/ir/opcodes.inc |
| @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC | |||
| 94 | shader_info.h | 95 | shader_info.h |
| 95 | ) | 96 | ) |
| 96 | 97 | ||
| 97 | target_include_directories(shader_recompiler PRIVATE sirit) | 98 | target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) |
| 98 | target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) | ||
| 99 | target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) | ||
| 100 | 99 | ||
| 101 | add_executable(shader_util main.cpp) | 100 | add_executable(shader_util main.cpp) |
| 102 | target_link_libraries(shader_util PRIVATE shader_recompiler) | 101 | target_link_libraries(shader_util PRIVATE shader_recompiler) |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4ce07c281..2519e446a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -14,8 +14,6 @@ | |||
| 14 | #include "shader_recompiler/frontend/ir/microinstruction.h" | 14 | #include "shader_recompiler/frontend/ir/microinstruction.h" |
| 15 | #include "shader_recompiler/frontend/ir/program.h" | 15 | #include "shader_recompiler/frontend/ir/program.h" |
| 16 | 16 | ||
| 17 | #pragma optimize("", off) | ||
| 18 | |||
| 19 | namespace Shader::Backend::SPIRV { | 17 | namespace Shader::Backend::SPIRV { |
| 20 | namespace { | 18 | namespace { |
| 21 | template <class Func> | 19 | template <class Func> |
| @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { | |||
| 113 | throw NotImplementedException("Phi node type {}", type); | 111 | throw NotImplementedException("Phi node type {}", type); |
| 114 | } | 112 | } |
| 115 | } | 113 | } |
| 114 | |||
| 115 | void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, | ||
| 116 | Id main_func) { | ||
| 117 | if (!profile.support_float_controls) { | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | const Info& info{program.info}; | ||
| 121 | if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && | ||
| 122 | !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | ctx.AddExtension("SPV_KHR_float_controls"); | ||
| 126 | |||
| 127 | if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { | ||
| 128 | // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); | ||
| 129 | } else if (info.uses_fp32_denorms_flush) { | ||
| 130 | if (profile.support_fp32_denorm_flush) { | ||
| 131 | ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||
| 132 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); | ||
| 133 | } else { | ||
| 134 | // Drivers will most likely flush denorms by default, no need to warn | ||
| 135 | } | ||
| 136 | } else if (info.uses_fp32_denorms_preserve) { | ||
| 137 | if (profile.support_fp32_denorm_preserve) { | ||
| 138 | ctx.AddCapability(spv::Capability::DenormPreserve); | ||
| 139 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); | ||
| 140 | } else { | ||
| 141 | // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | if (!profile.support_separate_denorm_behavior) { | ||
| 145 | // No separate denorm behavior | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { | ||
| 149 | // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); | ||
| 150 | } else if (info.uses_fp16_denorms_flush) { | ||
| 151 | if (profile.support_fp16_denorm_flush) { | ||
| 152 | ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||
| 153 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); | ||
| 154 | } else { | ||
| 155 | // Same as fp32, no need to warn as most drivers will flush by default | ||
| 156 | } | ||
| 157 | } else if (info.uses_fp32_denorms_preserve) { | ||
| 158 | if (profile.support_fp16_denorm_preserve) { | ||
| 159 | ctx.AddCapability(spv::Capability::DenormPreserve); | ||
| 160 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); | ||
| 161 | } else { | ||
| 162 | // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); | ||
| 163 | } | ||
| 164 | } | ||
| 165 | } | ||
| 116 | } // Anonymous namespace | 166 | } // Anonymous namespace |
| 117 | 167 | ||
| 118 | std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { | 168 | std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { |
| 119 | EmitContext ctx{program}; | 169 | EmitContext ctx{program}; |
| 120 | const Id void_function{ctx.TypeFunction(ctx.void_id)}; | 170 | const Id void_function{ctx.TypeFunction(ctx.void_id)}; |
| 121 | // FIXME: Forward declare functions (needs sirit support) | 171 | // FIXME: Forward declare functions (needs sirit support) |
| @@ -131,10 +181,11 @@ std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { | |||
| 131 | ctx.OpFunctionEnd(); | 181 | ctx.OpFunctionEnd(); |
| 132 | } | 182 | } |
| 133 | boost::container::small_vector<Id, 32> interfaces; | 183 | boost::container::small_vector<Id, 32> interfaces; |
| 134 | if (program.info.uses_workgroup_id) { | 184 | const Info& info{program.info}; |
| 185 | if (info.uses_workgroup_id) { | ||
| 135 | interfaces.push_back(ctx.workgroup_id); | 186 | interfaces.push_back(ctx.workgroup_id); |
| 136 | } | 187 | } |
| 137 | if (program.info.uses_local_invocation_id) { | 188 | if (info.uses_local_invocation_id) { |
| 138 | interfaces.push_back(ctx.local_invocation_id); | 189 | interfaces.push_back(ctx.local_invocation_id); |
| 139 | } | 190 | } |
| 140 | const std::span interfaces_span(interfaces.data(), interfaces.size()); | 191 | const std::span interfaces_span(interfaces.data(), interfaces.size()); |
| @@ -144,6 +195,8 @@ std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { | |||
| 144 | ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], | 195 | ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], |
| 145 | workgroup_size[2]); | 196 | workgroup_size[2]); |
| 146 | 197 | ||
| 198 | SetupDenormControl(profile, program, ctx, func); | ||
| 199 | |||
| 147 | return ctx.Assemble(); | 200 | return ctx.Assemble(); |
| 148 | } | 201 | } |
| 149 | 202 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 2b59c0b72..de624a151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -11,10 +11,12 @@ | |||
| 11 | #include "shader_recompiler/environment.h" | 11 | #include "shader_recompiler/environment.h" |
| 12 | #include "shader_recompiler/frontend/ir/microinstruction.h" | 12 | #include "shader_recompiler/frontend/ir/microinstruction.h" |
| 13 | #include "shader_recompiler/frontend/ir/program.h" | 13 | #include "shader_recompiler/frontend/ir/program.h" |
| 14 | #include "shader_recompiler/profile.h" | ||
| 14 | 15 | ||
| 15 | namespace Shader::Backend::SPIRV { | 16 | namespace Shader::Backend::SPIRV { |
| 16 | 17 | ||
| 17 | [[nodiscard]] std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program); | 18 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, |
| 19 | IR::Program& program); | ||
| 18 | 20 | ||
| 19 | // Microinstruction emitters | 21 | // Microinstruction emitters |
| 20 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst); | 22 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9ef180531..c9687de37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp | |||
| @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { | |||
| 13 | ctx.Decorate(op, spv::Decoration::NoContraction); | 13 | ctx.Decorate(op, spv::Decoration::NoContraction); |
| 14 | } | 14 | } |
| 15 | switch (flags.rounding) { | 15 | switch (flags.rounding) { |
| 16 | case IR::FpRounding::DontCare: | ||
| 17 | break; | ||
| 16 | case IR::FpRounding::RN: | 18 | case IR::FpRounding::RN: |
| 19 | ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); | ||
| 17 | break; | 20 | break; |
| 18 | case IR::FpRounding::RM: | 21 | case IR::FpRounding::RM: |
| 19 | ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); | 22 | ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); |
| @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { | |||
| 25 | ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); | 28 | ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); |
| 26 | break; | 29 | break; |
| 27 | } | 30 | } |
| 28 | if (flags.fmz_mode != IR::FmzMode::FTZ) { | ||
| 29 | throw NotImplementedException("Denorm management not implemented"); | ||
| 30 | } | ||
| 31 | return op; | 31 | return op; |
| 32 | } | 32 | } |
| 33 | 33 | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { | |||
| 558 | } | 558 | } |
| 559 | } | 559 | } |
| 560 | 560 | ||
| 561 | F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { | 561 | F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { |
| 562 | switch (value.Type()) { | 562 | switch (value.Type()) { |
| 563 | case Type::F16: | 563 | case Type::F16: |
| 564 | return Inst<F16>(Opcode::FPRoundEven16, value); | 564 | return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value); |
| 565 | case Type::F32: | 565 | case Type::F32: |
| 566 | return Inst<F32>(Opcode::FPRoundEven32, value); | 566 | return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value); |
| 567 | case Type::F64: | 567 | case Type::F64: |
| 568 | return Inst<F64>(Opcode::FPRoundEven64, value); | 568 | return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value); |
| 569 | default: | 569 | default: |
| 570 | ThrowInvalidType(value.Type()); | 570 | ThrowInvalidType(value.Type()); |
| 571 | } | 571 | } |
| 572 | } | 572 | } |
| 573 | 573 | ||
| 574 | F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { | 574 | F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { |
| 575 | switch (value.Type()) { | 575 | switch (value.Type()) { |
| 576 | case Type::F16: | 576 | case Type::F16: |
| 577 | return Inst<F16>(Opcode::FPFloor16, value); | 577 | return Inst<F16>(Opcode::FPFloor16, Flags{control}, value); |
| 578 | case Type::F32: | 578 | case Type::F32: |
| 579 | return Inst<F32>(Opcode::FPFloor32, value); | 579 | return Inst<F32>(Opcode::FPFloor32, Flags{control}, value); |
| 580 | case Type::F64: | 580 | case Type::F64: |
| 581 | return Inst<F64>(Opcode::FPFloor64, value); | 581 | return Inst<F64>(Opcode::FPFloor64, Flags{control}, value); |
| 582 | default: | 582 | default: |
| 583 | ThrowInvalidType(value.Type()); | 583 | ThrowInvalidType(value.Type()); |
| 584 | } | 584 | } |
| 585 | } | 585 | } |
| 586 | 586 | ||
| 587 | F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { | 587 | F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { |
| 588 | switch (value.Type()) { | 588 | switch (value.Type()) { |
| 589 | case Type::F16: | 589 | case Type::F16: |
| 590 | return Inst<F16>(Opcode::FPCeil16, value); | 590 | return Inst<F16>(Opcode::FPCeil16, Flags{control}, value); |
| 591 | case Type::F32: | 591 | case Type::F32: |
| 592 | return Inst<F32>(Opcode::FPCeil32, value); | 592 | return Inst<F32>(Opcode::FPCeil32, Flags{control}, value); |
| 593 | case Type::F64: | 593 | case Type::F64: |
| 594 | return Inst<F64>(Opcode::FPCeil64, value); | 594 | return Inst<F64>(Opcode::FPCeil64, Flags{control}, value); |
| 595 | default: | 595 | default: |
| 596 | ThrowInvalidType(value.Type()); | 596 | ThrowInvalidType(value.Type()); |
| 597 | } | 597 | } |
| 598 | } | 598 | } |
| 599 | 599 | ||
| 600 | F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { | 600 | F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { |
| 601 | switch (value.Type()) { | 601 | switch (value.Type()) { |
| 602 | case Type::F16: | 602 | case Type::F16: |
| 603 | return Inst<F16>(Opcode::FPTrunc16, value); | 603 | return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value); |
| 604 | case Type::F32: | 604 | case Type::F32: |
| 605 | return Inst<F32>(Opcode::FPTrunc32, value); | 605 | return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value); |
| 606 | case Type::F64: | 606 | case Type::F64: |
| 607 | return Inst<F64>(Opcode::FPTrunc64, value); | 607 | return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value); |
| 608 | default: | 608 | default: |
| 609 | ThrowInvalidType(value.Type()); | 609 | ThrowInvalidType(value.Type()); |
| 610 | } | 610 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -129,10 +129,10 @@ public: | |||
| 129 | [[nodiscard]] F32 FPSinNotReduced(const F32& value); | 129 | [[nodiscard]] F32 FPSinNotReduced(const F32& value); |
| 130 | [[nodiscard]] F32 FPSqrt(const F32& value); | 130 | [[nodiscard]] F32 FPSqrt(const F32& value); |
| 131 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | 131 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); |
| 132 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); | 132 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); |
| 133 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); | 133 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); |
| 134 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); | 134 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); |
| 135 | [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); | 135 | [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); |
| 136 | 136 | ||
| 137 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | 137 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); |
| 138 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); | 138 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -4,25 +4,30 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 7 | namespace Shader::IR { | 9 | namespace Shader::IR { |
| 8 | 10 | ||
| 9 | enum class FmzMode : u8 { | 11 | enum class FmzMode : u8 { |
| 10 | None, // Denorms are not flushed, NAN is propagated (nouveau) | 12 | DontCare, // Not specified for this instruction |
| 11 | FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) | 13 | FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) |
| 12 | FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) | 14 | FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) |
| 15 | None, // Denorms are not flushed, NAN is propagated (nouveau) | ||
| 13 | }; | 16 | }; |
| 14 | 17 | ||
| 15 | enum class FpRounding : u8 { | 18 | enum class FpRounding : u8 { |
| 16 | RN, // Round to nearest even, | 19 | DontCare, // Not specified for this instruction |
| 17 | RM, // Round towards negative infinity | 20 | RN, // Round to nearest even, |
| 18 | RP, // Round towards positive infinity | 21 | RM, // Round towards negative infinity |
| 19 | RZ, // Round towards zero | 22 | RP, // Round towards positive infinity |
| 23 | RZ, // Round towards zero | ||
| 20 | }; | 24 | }; |
| 21 | 25 | ||
| 22 | struct FpControl { | 26 | struct FpControl { |
| 23 | bool no_contraction{false}; | 27 | bool no_contraction{false}; |
| 24 | FpRounding rounding{FpRounding::RN}; | 28 | FpRounding rounding{FpRounding::DontCare}; |
| 25 | FmzMode fmz_mode{FmzMode::FTZ}; | 29 | FmzMode fmz_mode{FmzMode::DontCare}; |
| 26 | }; | 30 | }; |
| 27 | static_assert(sizeof(FpControl) <= sizeof(u32)); | 31 | static_assert(sizeof(FpControl) <= sizeof(u32)); |
| 32 | |||
| 28 | } // namespace Shader::IR | 33 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | |||
| @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | |||
| 81 | // F2I is used to convert from a floating point value to an integer | 81 | // F2I is used to convert from a floating point value to an integer |
| 82 | const F2I f2i{insn}; | 82 | const F2I f2i{insn}; |
| 83 | 83 | ||
| 84 | const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && | ||
| 85 | f2i.dest_format != DestFormat::I64}; | ||
| 86 | IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; | ||
| 87 | if (denorm_cares) { | ||
| 88 | fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; | ||
| 89 | } | ||
| 90 | const IR::FpControl fp_control{ | ||
| 91 | .no_contraction{true}, | ||
| 92 | .rounding{IR::FpRounding::DontCare}, | ||
| 93 | .fmz_mode{fmz_mode}, | ||
| 94 | }; | ||
| 84 | const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; | 95 | const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; |
| 85 | const IR::F16F32F64 rounded_value{[&] { | 96 | const IR::F16F32F64 rounded_value{[&] { |
| 86 | switch (f2i.rounding) { | 97 | switch (f2i.rounding) { |
| 87 | case Rounding::Round: | 98 | case Rounding::Round: |
| 88 | return v.ir.FPRoundEven(op_a); | 99 | return v.ir.FPRoundEven(op_a, fp_control); |
| 89 | case Rounding::Floor: | 100 | case Rounding::Floor: |
| 90 | return v.ir.FPFloor(op_a); | 101 | return v.ir.FPFloor(op_a, fp_control); |
| 91 | case Rounding::Ceil: | 102 | case Rounding::Ceil: |
| 92 | return v.ir.FPCeil(op_a); | 103 | return v.ir.FPCeil(op_a, fp_control); |
| 93 | case Rounding::Trunc: | 104 | case Rounding::Trunc: |
| 94 | return v.ir.FPTrunc(op_a); | 105 | return v.ir.FPTrunc(op_a, fp_control); |
| 95 | default: | 106 | default: |
| 96 | throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); | 107 | throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); |
| 97 | } | 108 | } |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -2,23 +2,28 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "shader_recompiler/frontend/ir/microinstruction.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 5 | #include "shader_recompiler/frontend/ir/program.h" | 7 | #include "shader_recompiler/frontend/ir/program.h" |
| 6 | #include "shader_recompiler/shader_info.h" | 8 | #include "shader_recompiler/shader_info.h" |
| 7 | 9 | ||
| 8 | namespace Shader::Optimization { | 10 | namespace Shader::Optimization { |
| 9 | namespace { | 11 | namespace { |
| 10 | void AddConstantBufferDescriptor(Info& info, u32 index) { | 12 | void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { |
| 11 | auto& descriptor{info.constant_buffers.at(index)}; | 13 | if (count != 1) { |
| 12 | if (descriptor) { | 14 | throw NotImplementedException("Constant buffer descriptor indexing"); |
| 15 | } | ||
| 16 | if ((info.constant_buffer_mask & (1U << index)) != 0) { | ||
| 13 | return; | 17 | return; |
| 14 | } | 18 | } |
| 15 | descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ | 19 | info.constant_buffer_mask |= 1U << index; |
| 20 | info.constant_buffer_descriptors.push_back({ | ||
| 16 | .index{index}, | 21 | .index{index}, |
| 17 | .count{1}, | 22 | .count{1}, |
| 18 | }); | 23 | }); |
| 19 | } | 24 | } |
| 20 | 25 | ||
| 21 | void Visit(Info& info, IR::Inst& inst) { | 26 | void VisitUsages(Info& info, IR::Inst& inst) { |
| 22 | switch (inst.Opcode()) { | 27 | switch (inst.Opcode()) { |
| 23 | case IR::Opcode::WorkgroupId: | 28 | case IR::Opcode::WorkgroupId: |
| 24 | info.uses_workgroup_id = true; | 29 | info.uses_workgroup_id = true; |
| @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { | |||
| 72 | break; | 77 | break; |
| 73 | case IR::Opcode::GetCbuf: | 78 | case IR::Opcode::GetCbuf: |
| 74 | if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { | 79 | if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { |
| 75 | AddConstantBufferDescriptor(info, index.U32()); | 80 | AddConstantBufferDescriptor(info, index.U32(), 1); |
| 76 | } else { | 81 | } else { |
| 77 | throw NotImplementedException("Constant buffer with non-immediate index"); | 82 | throw NotImplementedException("Constant buffer with non-immediate index"); |
| 78 | } | 83 | } |
| @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { | |||
| 81 | break; | 86 | break; |
| 82 | } | 87 | } |
| 83 | } | 88 | } |
| 89 | |||
| 90 | void VisitFpModifiers(Info& info, IR::Inst& inst) { | ||
| 91 | switch (inst.Opcode()) { | ||
| 92 | case IR::Opcode::FPAdd16: | ||
| 93 | case IR::Opcode::FPFma16: | ||
| 94 | case IR::Opcode::FPMul16: | ||
| 95 | case IR::Opcode::FPRoundEven16: | ||
| 96 | case IR::Opcode::FPFloor16: | ||
| 97 | case IR::Opcode::FPCeil16: | ||
| 98 | case IR::Opcode::FPTrunc16: { | ||
| 99 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 100 | switch (control.fmz_mode) { | ||
| 101 | case IR::FmzMode::DontCare: | ||
| 102 | break; | ||
| 103 | case IR::FmzMode::FTZ: | ||
| 104 | case IR::FmzMode::FMZ: | ||
| 105 | info.uses_fp16_denorms_flush = true; | ||
| 106 | break; | ||
| 107 | case IR::FmzMode::None: | ||
| 108 | info.uses_fp16_denorms_preserve = true; | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | case IR::Opcode::FPAdd32: | ||
| 114 | case IR::Opcode::FPFma32: | ||
| 115 | case IR::Opcode::FPMul32: | ||
| 116 | case IR::Opcode::FPRoundEven32: | ||
| 117 | case IR::Opcode::FPFloor32: | ||
| 118 | case IR::Opcode::FPCeil32: | ||
| 119 | case IR::Opcode::FPTrunc32: { | ||
| 120 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 121 | switch (control.fmz_mode) { | ||
| 122 | case IR::FmzMode::DontCare: | ||
| 123 | break; | ||
| 124 | case IR::FmzMode::FTZ: | ||
| 125 | case IR::FmzMode::FMZ: | ||
| 126 | info.uses_fp32_denorms_flush = true; | ||
| 127 | break; | ||
| 128 | case IR::FmzMode::None: | ||
| 129 | info.uses_fp32_denorms_preserve = true; | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | default: | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | void Visit(Info& info, IR::Inst& inst) { | ||
| 140 | VisitUsages(info, inst); | ||
| 141 | VisitFpModifiers(info, inst); | ||
| 142 | } | ||
| 84 | } // Anonymous namespace | 143 | } // Anonymous namespace |
| 85 | 144 | ||
| 86 | void CollectShaderInfoPass(IR::Program& program) { | 145 | void CollectShaderInfoPass(IR::Program& program) { |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 351 | .cbuf_offset{storage_buffer.offset}, | 351 | .cbuf_offset{storage_buffer.offset}, |
| 352 | .count{1}, | 352 | .count{1}, |
| 353 | }); | 353 | }); |
| 354 | info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); | ||
| 355 | ++storage_index; | 354 | ++storage_index; |
| 356 | } | 355 | } |
| 357 | for (const StorageInst& storage_inst : to_replace) { | 356 | for (const StorageInst& storage_inst : to_replace) { |
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index abd44e323..72565f477 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp | |||
| @@ -60,6 +60,17 @@ void RunDatabase() { | |||
| 60 | fmt::print(stdout, "{} ms", duration_cast<milliseconds>(t - t0).count() / double(N)); | 60 | fmt::print(stdout, "{} ms", duration_cast<milliseconds>(t - t0).count() / double(N)); |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | static constexpr Profile PROFILE{ | ||
| 64 | .unified_descriptor_binding = true, | ||
| 65 | .support_float_controls = true, | ||
| 66 | .support_separate_denorm_behavior = true, | ||
| 67 | .support_separate_rounding_mode = true, | ||
| 68 | .support_fp16_denorm_preserve = true, | ||
| 69 | .support_fp32_denorm_preserve = true, | ||
| 70 | .support_fp16_denorm_flush = true, | ||
| 71 | .support_fp32_denorm_flush = true, | ||
| 72 | }; | ||
| 73 | |||
| 63 | int main() { | 74 | int main() { |
| 64 | // RunDatabase(); | 75 | // RunDatabase(); |
| 65 | 76 | ||
| @@ -76,7 +87,7 @@ int main() { | |||
| 76 | fmt::print(stdout, "{}\n", cfg.Dot()); | 87 | fmt::print(stdout, "{}\n", cfg.Dot()); |
| 77 | IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; | 88 | IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; |
| 78 | fmt::print(stdout, "{}\n", IR::DumpProgram(program)); | 89 | fmt::print(stdout, "{}\n", IR::DumpProgram(program)); |
| 79 | const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(env, program)}; | 90 | const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; |
| 80 | std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; | 91 | std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; |
| 81 | std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); | 92 | std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); |
| 82 | std::fclose(file); | 93 | std::fclose(file); |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c96d783b7..9881bebab 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -7,7 +7,14 @@ | |||
| 7 | namespace Shader { | 7 | namespace Shader { |
| 8 | 8 | ||
| 9 | struct Profile { | 9 | struct Profile { |
| 10 | bool unified_descriptor_binding; | 10 | bool unified_descriptor_binding{}; |
| 11 | bool support_float_controls{}; | ||
| 12 | bool support_separate_denorm_behavior{}; | ||
| 13 | bool support_separate_rounding_mode{}; | ||
| 14 | bool support_fp16_denorm_preserve{}; | ||
| 15 | bool support_fp32_denorm_preserve{}; | ||
| 16 | bool support_fp16_denorm_flush{}; | ||
| 17 | bool support_fp32_denorm_flush{}; | ||
| 11 | }; | 18 | }; |
| 12 | 19 | ||
| 13 | } // namespace Shader | 20 | } // namespace Shader |
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b25081e39..527e19c27 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp | |||
| @@ -14,14 +14,15 @@ | |||
| 14 | 14 | ||
| 15 | namespace Shader { | 15 | namespace Shader { |
| 16 | 16 | ||
| 17 | std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address) { | 17 | std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env, |
| 18 | u32 start_address) { | ||
| 18 | ObjectPool<Maxwell::Flow::Block> flow_block_pool; | 19 | ObjectPool<Maxwell::Flow::Block> flow_block_pool; |
| 19 | ObjectPool<IR::Inst> inst_pool; | 20 | ObjectPool<IR::Inst> inst_pool; |
| 20 | ObjectPool<IR::Block> block_pool; | 21 | ObjectPool<IR::Block> block_pool; |
| 21 | 22 | ||
| 22 | Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; | 23 | Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; |
| 23 | IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; | 24 | IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; |
| 24 | return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; | 25 | return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; |
| 25 | } | 26 | } |
| 26 | 27 | ||
| 27 | } // namespace Shader | 28 | } // namespace Shader |
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 4cb973878..2529463ae 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h | |||
| @@ -9,10 +9,12 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "shader_recompiler/environment.h" | 11 | #include "shader_recompiler/environment.h" |
| 12 | #include "shader_recompiler/profile.h" | ||
| 12 | #include "shader_recompiler/shader_info.h" | 13 | #include "shader_recompiler/shader_info.h" |
| 13 | 14 | ||
| 14 | namespace Shader { | 15 | namespace Shader { |
| 15 | 16 | ||
| 16 | [[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address); | 17 | [[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, |
| 18 | Environment& env, u32 start_address); | ||
| 17 | 19 | ||
| 18 | } // namespace Shader | 20 | } // namespace Shader |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f49a79368..8766bf13e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -31,14 +31,15 @@ struct Info { | |||
| 31 | bool uses_local_invocation_id{}; | 31 | bool uses_local_invocation_id{}; |
| 32 | bool uses_fp16{}; | 32 | bool uses_fp16{}; |
| 33 | bool uses_fp64{}; | 33 | bool uses_fp64{}; |
| 34 | bool uses_fp16_denorms_flush{}; | ||
| 35 | bool uses_fp16_denorms_preserve{}; | ||
| 36 | bool uses_fp32_denorms_flush{}; | ||
| 37 | bool uses_fp32_denorms_preserve{}; | ||
| 34 | 38 | ||
| 35 | u32 constant_buffer_mask{}; | 39 | u32 constant_buffer_mask{}; |
| 36 | 40 | ||
| 37 | std::array<ConstantBufferDescriptor*, MAX_CBUFS> constant_buffers{}; | ||
| 38 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | 41 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> |
| 39 | constant_buffer_descriptors; | 42 | constant_buffer_descriptors; |
| 40 | |||
| 41 | std::array<StorageBufferDescriptor*, MAX_SSBOS> storage_buffers{}; | ||
| 42 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | 43 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; |
| 43 | }; | 44 | }; |
| 44 | 45 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip | |||
| 131 | })} {} | 131 | })} {} |
| 132 | 132 | ||
| 133 | void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { | 133 | void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { |
| 134 | u32 enabled_uniforms{}; | 134 | buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); |
| 135 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 136 | enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; | ||
| 137 | } | ||
| 138 | buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); | ||
| 139 | |||
| 140 | buffer_cache.UnbindComputeStorageBuffers(); | 135 | buffer_cache.UnbindComputeStorageBuffers(); |
| 141 | size_t index{}; | 136 | size_t index{}; |
| 142 | for (const auto& desc : info.storage_buffers_descriptors) { | 137 | for (const auto& desc : info.storage_buffers_descriptors) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { | |||
| 177 | if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { | 177 | if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { |
| 178 | // TODO: Load from cache | 178 | // TODO: Load from cache |
| 179 | } | 179 | } |
| 180 | const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; | 180 | const auto& float_control{device.FloatControlProperties()}; |
| 181 | const Shader::Profile profile{ | ||
| 182 | .unified_descriptor_binding = true, | ||
| 183 | .support_float_controls = true, | ||
| 184 | .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == | ||
| 185 | VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 186 | .support_separate_rounding_mode = | ||
| 187 | float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 188 | .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, | ||
| 189 | .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, | ||
| 190 | .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, | ||
| 191 | .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, | ||
| 192 | }; | ||
| 193 | const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; | ||
| 181 | 194 | ||
| 182 | FILE* file = fopen("D:\\shader.spv", "wb"); | 195 | FILE* file = fopen("D:\\shader.spv", "wb"); |
| 183 | fwrite(code.data(), 4, code.size(), file); | 196 | fwrite(code.data(), 4, code.size(), file); |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
| 43 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, | 43 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, |
| 44 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, | 44 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, |
| 45 | VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, | 45 | VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, |
| 46 | VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, | ||
| 46 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | 47 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |
| 47 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | 48 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |
| 48 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 49 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 200 | CheckSuitability(surface != nullptr); | 201 | CheckSuitability(surface != nullptr); |
| 201 | SetupFamilies(surface); | 202 | SetupFamilies(surface); |
| 202 | SetupFeatures(); | 203 | SetupFeatures(); |
| 204 | SetupProperties(); | ||
| 203 | 205 | ||
| 204 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 206 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 205 | const std::vector extensions = LoadExtensions(surface != nullptr); | 207 | const std::vector extensions = LoadExtensions(surface != nullptr); |
| @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 426 | 428 | ||
| 427 | graphics_queue = logical.GetQueue(graphics_family); | 429 | graphics_queue = logical.GetQueue(graphics_family); |
| 428 | present_queue = logical.GetQueue(present_family); | 430 | present_queue = logical.GetQueue(present_family); |
| 429 | |||
| 430 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); | ||
| 431 | } | 431 | } |
| 432 | 432 | ||
| 433 | Device::~Device() = default; | 433 | Device::~Device() = default; |
| @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 600 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; | 600 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; |
| 601 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | 601 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; |
| 602 | 602 | ||
| 603 | VkPhysicalDeviceFeatures2 features2{}; | 603 | VkPhysicalDeviceFeatures2KHR features2{}; |
| 604 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | 604 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; |
| 605 | features2.pNext = &robustness2; | 605 | features2.pNext = &robustness2; |
| 606 | 606 | ||
| @@ -684,7 +684,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 684 | true); | 684 | true); |
| 685 | } | 685 | } |
| 686 | } | 686 | } |
| 687 | VkPhysicalDeviceFeatures2KHR features; | 687 | VkPhysicalDeviceFeatures2KHR features{}; |
| 688 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | 688 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; |
| 689 | 689 | ||
| 690 | VkPhysicalDeviceProperties2KHR physical_properties; | 690 | VkPhysicalDeviceProperties2KHR physical_properties; |
| @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { | |||
| 806 | } | 806 | } |
| 807 | 807 | ||
| 808 | void Device::SetupFeatures() { | 808 | void Device::SetupFeatures() { |
| 809 | const auto supported_features{physical.GetFeatures()}; | 809 | const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; |
| 810 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; | 810 | is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; |
| 811 | is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; | 811 | is_shader_storage_image_multisample = features.shaderStorageImageMultisample; |
| 812 | is_blit_depth_stencil_supported = TestDepthStencilBlits(); | 812 | is_blit_depth_stencil_supported = TestDepthStencilBlits(); |
| 813 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); | 813 | is_optimal_astc_supported = IsOptimalAstcSupported(features); |
| 814 | } | ||
| 815 | |||
| 816 | void Device::SetupProperties() { | ||
| 817 | float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; | ||
| 818 | |||
| 819 | VkPhysicalDeviceProperties2KHR properties2{}; | ||
| 820 | properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | ||
| 821 | properties2.pNext = &float_controls; | ||
| 822 | |||
| 823 | physical.GetProperties2KHR(properties2); | ||
| 814 | } | 824 | } |
| 815 | 825 | ||
| 816 | void Device::CollectTelemetryParameters() { | 826 | void Device::CollectTelemetryParameters() { |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -128,6 +128,11 @@ public: | |||
| 128 | return properties.limits.maxComputeSharedMemorySize; | 128 | return properties.limits.maxComputeSharedMemorySize; |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | /// Returns float control properties of the device. | ||
| 132 | const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { | ||
| 133 | return float_controls; | ||
| 134 | } | ||
| 135 | |||
| 131 | /// Returns true if ASTC is natively supported. | 136 | /// Returns true if ASTC is natively supported. |
| 132 | bool IsOptimalAstcSupported() const { | 137 | bool IsOptimalAstcSupported() const { |
| 133 | return is_optimal_astc_supported; | 138 | return is_optimal_astc_supported; |
| @@ -223,11 +228,6 @@ public: | |||
| 223 | return reported_extensions; | 228 | return reported_extensions; |
| 224 | } | 229 | } |
| 225 | 230 | ||
| 226 | /// Returns true if the setting for async shader compilation is enabled. | ||
| 227 | bool UseAsynchronousShaders() const { | ||
| 228 | return use_asynchronous_shaders; | ||
| 229 | } | ||
| 230 | |||
| 231 | u64 GetDeviceLocalMemory() const { | 231 | u64 GetDeviceLocalMemory() const { |
| 232 | return device_access_memory; | 232 | return device_access_memory; |
| 233 | } | 233 | } |
| @@ -245,6 +245,9 @@ private: | |||
| 245 | /// Sets up device features. | 245 | /// Sets up device features. |
| 246 | void SetupFeatures(); | 246 | void SetupFeatures(); |
| 247 | 247 | ||
| 248 | /// Sets up device properties. | ||
| 249 | void SetupProperties(); | ||
| 250 | |||
| 248 | /// Collects telemetry information from the device. | 251 | /// Collects telemetry information from the device. |
| 249 | void CollectTelemetryParameters(); | 252 | void CollectTelemetryParameters(); |
| 250 | 253 | ||
| @@ -267,14 +270,15 @@ private: | |||
| 267 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 270 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 268 | FormatType format_type) const; | 271 | FormatType format_type) const; |
| 269 | 272 | ||
| 270 | VkInstance instance; ///< Vulkan instance. | 273 | VkInstance instance; ///< Vulkan instance. |
| 271 | vk::DeviceDispatch dld; ///< Device function pointers. | 274 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 272 | vk::PhysicalDevice physical; ///< Physical device. | 275 | vk::PhysicalDevice physical; ///< Physical device. |
| 273 | VkPhysicalDeviceProperties properties; ///< Device properties. | 276 | VkPhysicalDeviceProperties properties; ///< Device properties. |
| 274 | vk::Device logical; ///< Logical device. | 277 | VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. |
| 275 | vk::Queue graphics_queue; ///< Main graphics queue. | 278 | vk::Device logical; ///< Logical device. |
| 276 | vk::Queue present_queue; ///< Main present queue. | 279 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 277 | u32 instance_version{}; ///< Vulkan onstance version. | 280 | vk::Queue present_queue; ///< Main present queue. |
| 281 | u32 instance_version{}; ///< Vulkan onstance version. | ||
| 278 | u32 graphics_family{}; ///< Main graphics queue family index. | 282 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 279 | u32 present_family{}; ///< Main present queue family index. | 283 | u32 present_family{}; ///< Main present queue family index. |
| 280 | VkDriverIdKHR driver_id{}; ///< Driver ID. | 284 | VkDriverIdKHR driver_id{}; ///< Driver ID. |
| @@ -301,9 +305,6 @@ private: | |||
| 301 | bool has_renderdoc{}; ///< Has RenderDoc attached | 305 | bool has_renderdoc{}; ///< Has RenderDoc attached |
| 302 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | 306 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached |
| 303 | 307 | ||
| 304 | // Asynchronous Graphics Pipeline setting | ||
| 305 | bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline | ||
| 306 | |||
| 307 | // Telemetry parameters | 308 | // Telemetry parameters |
| 308 | std::string vendor_name; ///< Device's driver name. | 309 | std::string vendor_name; ///< Device's driver name. |
| 309 | std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. | 310 | std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { | |||
| 311 | return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; | 311 | return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; |
| 312 | case VkResult::VK_ERROR_UNKNOWN: | 312 | case VkResult::VK_ERROR_UNKNOWN: |
| 313 | return "VK_ERROR_UNKNOWN"; | 313 | return "VK_ERROR_UNKNOWN"; |
| 314 | case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: | ||
| 315 | return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; | ||
| 316 | case VkResult::VK_THREAD_IDLE_KHR: | 314 | case VkResult::VK_THREAD_IDLE_KHR: |
| 317 | return "VK_THREAD_IDLE_KHR"; | 315 | return "VK_THREAD_IDLE_KHR"; |
| 318 | case VkResult::VK_THREAD_DONE_KHR: | 316 | case VkResult::VK_THREAD_DONE_KHR: |