diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 6 | ||||
| -rw-r--r-- | src/shader_recompiler/host_translate_info.h | 3 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp | 44 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp | 185 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 10 | ||||
| -rw-r--r-- | src/yuzu/main.cpp | 2 |
13 files changed, 264 insertions, 1 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 525b2363c..07e75f9d8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC | |||
| 216 | frontend/maxwell/translate_program.h | 216 | frontend/maxwell/translate_program.h |
| 217 | host_translate_info.h | 217 | host_translate_info.h |
| 218 | ir_opt/collect_shader_info_pass.cpp | 218 | ir_opt/collect_shader_info_pass.cpp |
| 219 | ir_opt/conditional_barrier_pass.cpp | ||
| 219 | ir_opt/constant_propagation_pass.cpp | 220 | ir_opt/constant_propagation_pass.cpp |
| 220 | ir_opt/dead_code_elimination_pass.cpp | 221 | ir_opt/dead_code_elimination_pass.cpp |
| 221 | ir_opt/dual_vertex_pass.cpp | 222 | ir_opt/dual_vertex_pass.cpp |
| @@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC | |||
| 223 | ir_opt/identity_removal_pass.cpp | 224 | ir_opt/identity_removal_pass.cpp |
| 224 | ir_opt/layer_pass.cpp | 225 | ir_opt/layer_pass.cpp |
| 225 | ir_opt/lower_fp16_to_fp32.cpp | 226 | ir_opt/lower_fp16_to_fp32.cpp |
| 227 | ir_opt/lower_fp64_to_fp32.cpp | ||
| 226 | ir_opt/lower_int64_to_int32.cpp | 228 | ir_opt/lower_int64_to_int32.cpp |
| 227 | ir_opt/passes.h | 229 | ir_opt/passes.h |
| 228 | ir_opt/position_pass.cpp | 230 | ir_opt/position_pass.cpp |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 17a6d4888..928b35561 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -280,12 +280,18 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 280 | RemoveUnreachableBlocks(program); | 280 | RemoveUnreachableBlocks(program); |
| 281 | 281 | ||
| 282 | // Replace instructions before the SSA rewrite | 282 | // Replace instructions before the SSA rewrite |
| 283 | if (!host_info.support_float64) { | ||
| 284 | Optimization::LowerFp64ToFp32(program); | ||
| 285 | } | ||
| 283 | if (!host_info.support_float16) { | 286 | if (!host_info.support_float16) { |
| 284 | Optimization::LowerFp16ToFp32(program); | 287 | Optimization::LowerFp16ToFp32(program); |
| 285 | } | 288 | } |
| 286 | if (!host_info.support_int64) { | 289 | if (!host_info.support_int64) { |
| 287 | Optimization::LowerInt64ToInt32(program); | 290 | Optimization::LowerInt64ToInt32(program); |
| 288 | } | 291 | } |
| 292 | if (!host_info.support_conditional_barrier) { | ||
| 293 | Optimization::ConditionalBarrierPass(program); | ||
| 294 | } | ||
| 289 | Optimization::SsaRewritePass(program); | 295 | Optimization::SsaRewritePass(program); |
| 290 | 296 | ||
| 291 | Optimization::ConstantPropagationPass(env, program); | 297 | Optimization::ConstantPropagationPass(env, program); |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 2aaa6c5ea..7d2ded907 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -10,6 +10,7 @@ namespace Shader { | |||
| 10 | 10 | ||
| 11 | /// Misc information about the host | 11 | /// Misc information about the host |
| 12 | struct HostTranslateInfo { | 12 | struct HostTranslateInfo { |
| 13 | bool support_float64{}; ///< True when the device supports 64-bit floats | ||
| 13 | bool support_float16{}; ///< True when the device supports 16-bit floats | 14 | bool support_float16{}; ///< True when the device supports 16-bit floats |
| 14 | bool support_int64{}; ///< True when the device supports 64-bit integers | 15 | bool support_int64{}; ///< True when the device supports 64-bit integers |
| 15 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |
| @@ -17,6 +18,8 @@ struct HostTranslateInfo { | |||
| 17 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |
| 18 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry | 19 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry |
| 19 | ///< passthrough shaders | 20 | ///< passthrough shaders |
| 21 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional | ||
| 22 | ///< control flow | ||
| 20 | }; | 23 | }; |
| 21 | 24 | ||
| 22 | } // namespace Shader | 25 | } // namespace Shader |
diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp new file mode 100644 index 000000000..c3ed27f4f --- /dev/null +++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 5 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 6 | |||
| 7 | namespace Shader::Optimization { | ||
| 8 | |||
| 9 | void ConditionalBarrierPass(IR::Program& program) { | ||
| 10 | s32 conditional_control_flow_count{0}; | ||
| 11 | s32 conditional_return_count{0}; | ||
| 12 | for (IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 13 | switch (node.type) { | ||
| 14 | case IR::AbstractSyntaxNode::Type::If: | ||
| 15 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 16 | conditional_control_flow_count++; | ||
| 17 | break; | ||
| 18 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 19 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 20 | conditional_control_flow_count--; | ||
| 21 | break; | ||
| 22 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 23 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 24 | if (conditional_control_flow_count > 0) { | ||
| 25 | conditional_return_count++; | ||
| 26 | } | ||
| 27 | break; | ||
| 28 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 29 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 30 | if ((conditional_control_flow_count > 0 || conditional_return_count > 0) && | ||
| 31 | inst.GetOpcode() == IR::Opcode::Barrier) { | ||
| 32 | LOG_WARNING(Shader, "Barrier within conditional control flow"); | ||
| 33 | inst.ReplaceOpcode(IR::Opcode::Identity); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | break; | ||
| 37 | default: | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | ASSERT(conditional_control_flow_count == 0); | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp new file mode 100644 index 000000000..5db7a38ad --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp | |||
| @@ -0,0 +1,185 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 5 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 8 | |||
| 9 | namespace Shader::Optimization { | ||
| 10 | namespace { | ||
| 11 | |||
| 12 | constexpr s32 F64ToF32Exp = +1023 - 127; | ||
| 13 | constexpr s32 F32ToF64Exp = +127 - 1023; | ||
| 14 | |||
| 15 | IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { | ||
| 16 | const IR::U32 lo{ir.CompositeExtract(packed, 0)}; | ||
| 17 | const IR::U32 hi{ir.CompositeExtract(packed, 1)}; | ||
| 18 | const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; | ||
| 19 | const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; | ||
| 20 | const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; | ||
| 21 | const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; | ||
| 22 | const IR::U32 mantissa{ | ||
| 23 | ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; | ||
| 24 | const IR::U32 exp_if_subnorm{ | ||
| 25 | ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; | ||
| 26 | const IR::U32 exp_if_infnan{ | ||
| 27 | ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; | ||
| 28 | const IR::U32 result{ | ||
| 29 | ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), | ||
| 30 | ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; | ||
| 31 | return ir.BitCast<IR::F32>(result); | ||
| 32 | } | ||
| 33 | |||
| 34 | IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { | ||
| 35 | const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))}; | ||
| 36 | const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; | ||
| 37 | const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; | ||
| 38 | const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; | ||
| 39 | const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; | ||
| 40 | const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; | ||
| 41 | const IR::U32 exp_if_subnorm{ | ||
| 42 | ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; | ||
| 43 | const IR::U32 exp_if_infnan{ | ||
| 44 | ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; | ||
| 45 | const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; | ||
| 46 | const IR::U32 hi{ | ||
| 47 | ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), | ||
| 48 | ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; | ||
| 49 | return ir.CompositeConstruct(lo, hi); | ||
| 50 | } | ||
| 51 | |||
| 52 | IR::Opcode Replace(IR::Opcode op) { | ||
| 53 | switch (op) { | ||
| 54 | case IR::Opcode::FPAbs64: | ||
| 55 | return IR::Opcode::FPAbs32; | ||
| 56 | case IR::Opcode::FPAdd64: | ||
| 57 | return IR::Opcode::FPAdd32; | ||
| 58 | case IR::Opcode::FPCeil64: | ||
| 59 | return IR::Opcode::FPCeil32; | ||
| 60 | case IR::Opcode::FPFloor64: | ||
| 61 | return IR::Opcode::FPFloor32; | ||
| 62 | case IR::Opcode::FPFma64: | ||
| 63 | return IR::Opcode::FPFma32; | ||
| 64 | case IR::Opcode::FPMul64: | ||
| 65 | return IR::Opcode::FPMul32; | ||
| 66 | case IR::Opcode::FPNeg64: | ||
| 67 | return IR::Opcode::FPNeg32; | ||
| 68 | case IR::Opcode::FPRoundEven64: | ||
| 69 | return IR::Opcode::FPRoundEven32; | ||
| 70 | case IR::Opcode::FPSaturate64: | ||
| 71 | return IR::Opcode::FPSaturate32; | ||
| 72 | case IR::Opcode::FPClamp64: | ||
| 73 | return IR::Opcode::FPClamp32; | ||
| 74 | case IR::Opcode::FPTrunc64: | ||
| 75 | return IR::Opcode::FPTrunc32; | ||
| 76 | case IR::Opcode::CompositeConstructF64x2: | ||
| 77 | return IR::Opcode::CompositeConstructF32x2; | ||
| 78 | case IR::Opcode::CompositeConstructF64x3: | ||
| 79 | return IR::Opcode::CompositeConstructF32x3; | ||
| 80 | case IR::Opcode::CompositeConstructF64x4: | ||
| 81 | return IR::Opcode::CompositeConstructF32x4; | ||
| 82 | case IR::Opcode::CompositeExtractF64x2: | ||
| 83 | return IR::Opcode::CompositeExtractF32x2; | ||
| 84 | case IR::Opcode::CompositeExtractF64x3: | ||
| 85 | return IR::Opcode::CompositeExtractF32x3; | ||
| 86 | case IR::Opcode::CompositeExtractF64x4: | ||
| 87 | return IR::Opcode::CompositeExtractF32x4; | ||
| 88 | case IR::Opcode::CompositeInsertF64x2: | ||
| 89 | return IR::Opcode::CompositeInsertF32x2; | ||
| 90 | case IR::Opcode::CompositeInsertF64x3: | ||
| 91 | return IR::Opcode::CompositeInsertF32x3; | ||
| 92 | case IR::Opcode::CompositeInsertF64x4: | ||
| 93 | return IR::Opcode::CompositeInsertF32x4; | ||
| 94 | case IR::Opcode::FPOrdEqual64: | ||
| 95 | return IR::Opcode::FPOrdEqual32; | ||
| 96 | case IR::Opcode::FPUnordEqual64: | ||
| 97 | return IR::Opcode::FPUnordEqual32; | ||
| 98 | case IR::Opcode::FPOrdNotEqual64: | ||
| 99 | return IR::Opcode::FPOrdNotEqual32; | ||
| 100 | case IR::Opcode::FPUnordNotEqual64: | ||
| 101 | return IR::Opcode::FPUnordNotEqual32; | ||
| 102 | case IR::Opcode::FPOrdLessThan64: | ||
| 103 | return IR::Opcode::FPOrdLessThan32; | ||
| 104 | case IR::Opcode::FPUnordLessThan64: | ||
| 105 | return IR::Opcode::FPUnordLessThan32; | ||
| 106 | case IR::Opcode::FPOrdGreaterThan64: | ||
| 107 | return IR::Opcode::FPOrdGreaterThan32; | ||
| 108 | case IR::Opcode::FPUnordGreaterThan64: | ||
| 109 | return IR::Opcode::FPUnordGreaterThan32; | ||
| 110 | case IR::Opcode::FPOrdLessThanEqual64: | ||
| 111 | return IR::Opcode::FPOrdLessThanEqual32; | ||
| 112 | case IR::Opcode::FPUnordLessThanEqual64: | ||
| 113 | return IR::Opcode::FPUnordLessThanEqual32; | ||
| 114 | case IR::Opcode::FPOrdGreaterThanEqual64: | ||
| 115 | return IR::Opcode::FPOrdGreaterThanEqual32; | ||
| 116 | case IR::Opcode::FPUnordGreaterThanEqual64: | ||
| 117 | return IR::Opcode::FPUnordGreaterThanEqual32; | ||
| 118 | case IR::Opcode::FPIsNan64: | ||
| 119 | return IR::Opcode::FPIsNan32; | ||
| 120 | case IR::Opcode::ConvertS16F64: | ||
| 121 | return IR::Opcode::ConvertS16F32; | ||
| 122 | case IR::Opcode::ConvertS32F64: | ||
| 123 | return IR::Opcode::ConvertS32F32; | ||
| 124 | case IR::Opcode::ConvertS64F64: | ||
| 125 | return IR::Opcode::ConvertS64F32; | ||
| 126 | case IR::Opcode::ConvertU16F64: | ||
| 127 | return IR::Opcode::ConvertU16F32; | ||
| 128 | case IR::Opcode::ConvertU32F64: | ||
| 129 | return IR::Opcode::ConvertU32F32; | ||
| 130 | case IR::Opcode::ConvertU64F64: | ||
| 131 | return IR::Opcode::ConvertU64F32; | ||
| 132 | case IR::Opcode::ConvertF32F64: | ||
| 133 | return IR::Opcode::Identity; | ||
| 134 | case IR::Opcode::ConvertF64F32: | ||
| 135 | return IR::Opcode::Identity; | ||
| 136 | case IR::Opcode::ConvertF64S8: | ||
| 137 | return IR::Opcode::ConvertF32S8; | ||
| 138 | case IR::Opcode::ConvertF64S16: | ||
| 139 | return IR::Opcode::ConvertF32S16; | ||
| 140 | case IR::Opcode::ConvertF64S32: | ||
| 141 | return IR::Opcode::ConvertF32S32; | ||
| 142 | case IR::Opcode::ConvertF64S64: | ||
| 143 | return IR::Opcode::ConvertF32S64; | ||
| 144 | case IR::Opcode::ConvertF64U8: | ||
| 145 | return IR::Opcode::ConvertF32U8; | ||
| 146 | case IR::Opcode::ConvertF64U16: | ||
| 147 | return IR::Opcode::ConvertF32U16; | ||
| 148 | case IR::Opcode::ConvertF64U32: | ||
| 149 | return IR::Opcode::ConvertF32U32; | ||
| 150 | case IR::Opcode::ConvertF64U64: | ||
| 151 | return IR::Opcode::ConvertF32U64; | ||
| 152 | default: | ||
| 153 | return op; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | void Lower(IR::Block& block, IR::Inst& inst) { | ||
| 158 | switch (inst.GetOpcode()) { | ||
| 159 | case IR::Opcode::PackDouble2x32: { | ||
| 160 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 161 | inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | case IR::Opcode::UnpackDouble2x32: { | ||
| 165 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 166 | inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | default: | ||
| 170 | inst.ReplaceOpcode(Replace(inst.GetOpcode())); | ||
| 171 | break; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | } // Anonymous namespace | ||
| 176 | |||
| 177 | void LowerFp64ToFp32(IR::Program& program) { | ||
| 178 | for (IR::Block* const block : program.blocks) { | ||
| 179 | for (IR::Inst& inst : block->Instructions()) { | ||
| 180 | Lower(*block, inst); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95..629d18fa1 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -13,10 +13,12 @@ struct HostTranslateInfo; | |||
| 13 | namespace Shader::Optimization { | 13 | namespace Shader::Optimization { |
| 14 | 14 | ||
| 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); | 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); |
| 16 | void ConditionalBarrierPass(IR::Program& program); | ||
| 16 | void ConstantPropagationPass(Environment& env, IR::Program& program); | 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 17 | void DeadCodeEliminationPass(IR::Program& program); | 18 | void DeadCodeEliminationPass(IR::Program& program); |
| 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program); |
| 19 | void IdentityRemovalPass(IR::Program& program); | 20 | void IdentityRemovalPass(IR::Program& program); |
| 21 | void LowerFp64ToFp32(IR::Program& program); | ||
| 20 | void LowerFp16ToFp32(IR::Program& program); | 22 | void LowerFp16ToFp32(IR::Program& program); |
| 21 | void LowerInt64ToInt32(IR::Program& program); | 23 | void LowerInt64ToInt32(IR::Program& program); |
| 22 | void RescalingPass(IR::Program& program); | 24 | void RescalingPass(IR::Program& program); |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 400c21981..03d234f2f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 201 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && | 201 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && |
| 202 | !(is_amd || (is_intel && !is_linux)) && !strict_context_required; | 202 | !(is_amd || (is_intel && !is_linux)) && !strict_context_required; |
| 203 | use_driver_cache = is_nvidia; | 203 | use_driver_cache = is_nvidia; |
| 204 | supports_conditional_barriers = !is_intel; | ||
| 204 | 205 | ||
| 205 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 206 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 206 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 207 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index cc0b95f1a..ad27264e5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -188,6 +188,10 @@ public: | |||
| 188 | return strict_context_required; | 188 | return strict_context_required; |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | bool SupportsConditionalBarriers() const { | ||
| 192 | return supports_conditional_barriers; | ||
| 193 | } | ||
| 194 | |||
| 191 | private: | 195 | private: |
| 192 | static bool TestVariableAoffi(); | 196 | static bool TestVariableAoffi(); |
| 193 | static bool TestPreciseBug(); | 197 | static bool TestPreciseBug(); |
| @@ -233,6 +237,7 @@ private: | |||
| 233 | bool has_bool_ref_bug{}; | 237 | bool has_bool_ref_bug{}; |
| 234 | bool can_report_memory{}; | 238 | bool can_report_memory{}; |
| 235 | bool strict_context_required{}; | 239 | bool strict_context_required{}; |
| 240 | bool supports_conditional_barriers{}; | ||
| 236 | 241 | ||
| 237 | std::string vendor_name; | 242 | std::string vendor_name; |
| 238 | }; | 243 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ecda2984..3f077311e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -232,12 +232,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 232 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | 232 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), |
| 233 | }, | 233 | }, |
| 234 | host_info{ | 234 | host_info{ |
| 235 | .support_float64 = true, | ||
| 235 | .support_float16 = false, | 236 | .support_float16 = false, |
| 236 | .support_int64 = device.HasShaderInt64(), | 237 | .support_int64 = device.HasShaderInt64(), |
| 237 | .needs_demote_reorder = device.IsAmd(), | 238 | .needs_demote_reorder = device.IsAmd(), |
| 238 | .support_snorm_render_buffer = false, | 239 | .support_snorm_render_buffer = false, |
| 239 | .support_viewport_index_layer = device.HasVertexViewportLayer(), | 240 | .support_viewport_index_layer = device.HasVertexViewportLayer(), |
| 240 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), | 241 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), |
| 242 | .support_conditional_barrier = device.SupportsConditionalBarriers(), | ||
| 241 | } { | 243 | } { |
| 242 | if (use_asynchronous_shaders) { | 244 | if (use_asynchronous_shaders) { |
| 243 | workers = CreateWorkers(); | 245 | workers = CreateWorkers(); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9482e91b0..5734f51e5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -350,6 +350,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 350 | .has_broken_spirv_subgroup_mask_vector_extract_dynamic = | 350 | .has_broken_spirv_subgroup_mask_vector_extract_dynamic = |
| 351 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; | 351 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; |
| 352 | host_info = Shader::HostTranslateInfo{ | 352 | host_info = Shader::HostTranslateInfo{ |
| 353 | .support_float64 = device.IsFloat64Supported(), | ||
| 353 | .support_float16 = device.IsFloat16Supported(), | 354 | .support_float16 = device.IsFloat16Supported(), |
| 354 | .support_int64 = device.IsShaderInt64Supported(), | 355 | .support_int64 = device.IsShaderInt64Supported(), |
| 355 | .needs_demote_reorder = | 356 | .needs_demote_reorder = |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0158b6b0d..a46f9beed 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -386,6 +386,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 386 | IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, | 386 | IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, |
| 387 | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); | 387 | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); |
| 388 | 388 | ||
| 389 | supports_conditional_barriers = !(is_intel_anv || is_intel_windows); | ||
| 390 | |||
| 389 | CollectPhysicalMemoryInfo(); | 391 | CollectPhysicalMemoryInfo(); |
| 390 | CollectToolingInfo(); | 392 | CollectToolingInfo(); |
| 391 | 393 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d62a103a1..f314d0ffe 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -300,6 +300,11 @@ public: | |||
| 300 | return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; | 300 | return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; |
| 301 | } | 301 | } |
| 302 | 302 | ||
| 303 | /// Returns true if the device suppors float64 natively. | ||
| 304 | bool IsFloat64Supported() const { | ||
| 305 | return features.features.shaderFloat64; | ||
| 306 | } | ||
| 307 | |||
| 303 | /// Returns true if the device supports float16 natively. | 308 | /// Returns true if the device supports float16 natively. |
| 304 | bool IsFloat16Supported() const { | 309 | bool IsFloat16Supported() const { |
| 305 | return features.shader_float16_int8.shaderFloat16; | 310 | return features.shader_float16_int8.shaderFloat16; |
| @@ -580,6 +585,10 @@ public: | |||
| 580 | return properties.properties.limits.maxVertexInputBindings; | 585 | return properties.properties.limits.maxVertexInputBindings; |
| 581 | } | 586 | } |
| 582 | 587 | ||
| 588 | bool SupportsConditionalBarriers() const { | ||
| 589 | return supports_conditional_barriers; | ||
| 590 | } | ||
| 591 | |||
| 583 | private: | 592 | private: |
| 584 | /// Checks if the physical device is suitable and configures the object state | 593 | /// Checks if the physical device is suitable and configures the object state |
| 585 | /// with all necessary info about its properties. | 594 | /// with all necessary info about its properties. |
| @@ -683,6 +692,7 @@ private: | |||
| 683 | bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. | 692 | bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. |
| 684 | bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. | 693 | bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. |
| 685 | bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. | 694 | bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. |
| 695 | bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. | ||
| 686 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. | 696 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. |
| 687 | u32 sets_per_pool{}; ///< Sets per Description Pool | 697 | u32 sets_per_pool{}; ///< Sets per Description Pool |
| 688 | 698 | ||
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9d06b21b6..013715b44 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -3067,7 +3067,7 @@ InstallResult GMainWindow::InstallNSPXCI(const QString& filename) { | |||
| 3067 | return false; | 3067 | return false; |
| 3068 | } | 3068 | } |
| 3069 | 3069 | ||
| 3070 | std::array<u8, 0x1000> buffer{}; | 3070 | std::vector<u8> buffer(1_MiB); |
| 3071 | 3071 | ||
| 3072 | for (std::size_t i = 0; i < src->GetSize(); i += buffer.size()) { | 3072 | for (std::size_t i = 0; i < src->GetSize(); i += buffer.size()) { |
| 3073 | if (install_progress->wasCanceled()) { | 3073 | if (install_progress->wasCanceled()) { |