summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp3
-rw-r--r--src/shader_recompiler/host_translate_info.h1
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp185
-rw-r--r--src/shader_recompiler/ir_opt/passes.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h5
8 files changed, 198 insertions, 0 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 525b2363c..03f69c191 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -223,6 +223,7 @@ add_library(shader_recompiler STATIC
223 ir_opt/identity_removal_pass.cpp 223 ir_opt/identity_removal_pass.cpp
224 ir_opt/layer_pass.cpp 224 ir_opt/layer_pass.cpp
225 ir_opt/lower_fp16_to_fp32.cpp 225 ir_opt/lower_fp16_to_fp32.cpp
226 ir_opt/lower_fp64_to_fp32.cpp
226 ir_opt/lower_int64_to_int32.cpp 227 ir_opt/lower_int64_to_int32.cpp
227 ir_opt/passes.h 228 ir_opt/passes.h
228 ir_opt/position_pass.cpp 229 ir_opt/position_pass.cpp
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 17a6d4888..00d00e9f5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -280,6 +280,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
280 RemoveUnreachableBlocks(program); 280 RemoveUnreachableBlocks(program);
281 281
282 // Replace instructions before the SSA rewrite 282 // Replace instructions before the SSA rewrite
283 if (!host_info.support_float64) {
284 Optimization::LowerFp64ToFp32(program);
285 }
283 if (!host_info.support_float16) { 286 if (!host_info.support_float16) {
284 Optimization::LowerFp16ToFp32(program); 287 Optimization::LowerFp16ToFp32(program);
285 } 288 }
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 2aaa6c5ea..4c6322904 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -10,6 +10,7 @@ namespace Shader {
10 10
11/// Misc information about the host 11/// Misc information about the host
12struct HostTranslateInfo { 12struct HostTranslateInfo {
13 bool support_float64{}; ///< True when the device supports 64-bit floats
13 bool support_float16{}; ///< True when the device supports 16-bit floats 14 bool support_float16{}; ///< True when the device supports 16-bit floats
14 bool support_int64{}; ///< True when the device supports 64-bit integers 15 bool support_int64{}; ///< True when the device supports 64-bit integers
15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered 16 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
new file mode 100644
index 000000000..5db7a38ad
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
@@ -0,0 +1,185 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "shader_recompiler/frontend/ir/ir_emitter.h"
5#include "shader_recompiler/frontend/ir/opcodes.h"
6#include "shader_recompiler/frontend/ir/value.h"
7#include "shader_recompiler/ir_opt/passes.h"
8
9namespace Shader::Optimization {
10namespace {
11
12constexpr s32 F64ToF32Exp = +1023 - 127;
13constexpr s32 F32ToF64Exp = +127 - 1023;
14
15IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
16 const IR::U32 lo{ir.CompositeExtract(packed, 0)};
17 const IR::U32 hi{ir.CompositeExtract(packed, 1)};
18 const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
19 const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
20 const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
21 const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
22 const IR::U32 mantissa{
23 ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
24 const IR::U32 exp_if_subnorm{
25 ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
26 const IR::U32 exp_if_infnan{
27 ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
28 const IR::U32 result{
29 ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
30 ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
31 return ir.BitCast<IR::F32>(result);
32}
33
34IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
35 const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
36 const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
37 const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
38 const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
39 const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
40 const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
41 const IR::U32 exp_if_subnorm{
42 ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
43 const IR::U32 exp_if_infnan{
44 ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
45 const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
46 const IR::U32 hi{
47 ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
48 ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
49 return ir.CompositeConstruct(lo, hi);
50}
51
52IR::Opcode Replace(IR::Opcode op) {
53 switch (op) {
54 case IR::Opcode::FPAbs64:
55 return IR::Opcode::FPAbs32;
56 case IR::Opcode::FPAdd64:
57 return IR::Opcode::FPAdd32;
58 case IR::Opcode::FPCeil64:
59 return IR::Opcode::FPCeil32;
60 case IR::Opcode::FPFloor64:
61 return IR::Opcode::FPFloor32;
62 case IR::Opcode::FPFma64:
63 return IR::Opcode::FPFma32;
64 case IR::Opcode::FPMul64:
65 return IR::Opcode::FPMul32;
66 case IR::Opcode::FPNeg64:
67 return IR::Opcode::FPNeg32;
68 case IR::Opcode::FPRoundEven64:
69 return IR::Opcode::FPRoundEven32;
70 case IR::Opcode::FPSaturate64:
71 return IR::Opcode::FPSaturate32;
72 case IR::Opcode::FPClamp64:
73 return IR::Opcode::FPClamp32;
74 case IR::Opcode::FPTrunc64:
75 return IR::Opcode::FPTrunc32;
76 case IR::Opcode::CompositeConstructF64x2:
77 return IR::Opcode::CompositeConstructF32x2;
78 case IR::Opcode::CompositeConstructF64x3:
79 return IR::Opcode::CompositeConstructF32x3;
80 case IR::Opcode::CompositeConstructF64x4:
81 return IR::Opcode::CompositeConstructF32x4;
82 case IR::Opcode::CompositeExtractF64x2:
83 return IR::Opcode::CompositeExtractF32x2;
84 case IR::Opcode::CompositeExtractF64x3:
85 return IR::Opcode::CompositeExtractF32x3;
86 case IR::Opcode::CompositeExtractF64x4:
87 return IR::Opcode::CompositeExtractF32x4;
88 case IR::Opcode::CompositeInsertF64x2:
89 return IR::Opcode::CompositeInsertF32x2;
90 case IR::Opcode::CompositeInsertF64x3:
91 return IR::Opcode::CompositeInsertF32x3;
92 case IR::Opcode::CompositeInsertF64x4:
93 return IR::Opcode::CompositeInsertF32x4;
94 case IR::Opcode::FPOrdEqual64:
95 return IR::Opcode::FPOrdEqual32;
96 case IR::Opcode::FPUnordEqual64:
97 return IR::Opcode::FPUnordEqual32;
98 case IR::Opcode::FPOrdNotEqual64:
99 return IR::Opcode::FPOrdNotEqual32;
100 case IR::Opcode::FPUnordNotEqual64:
101 return IR::Opcode::FPUnordNotEqual32;
102 case IR::Opcode::FPOrdLessThan64:
103 return IR::Opcode::FPOrdLessThan32;
104 case IR::Opcode::FPUnordLessThan64:
105 return IR::Opcode::FPUnordLessThan32;
106 case IR::Opcode::FPOrdGreaterThan64:
107 return IR::Opcode::FPOrdGreaterThan32;
108 case IR::Opcode::FPUnordGreaterThan64:
109 return IR::Opcode::FPUnordGreaterThan32;
110 case IR::Opcode::FPOrdLessThanEqual64:
111 return IR::Opcode::FPOrdLessThanEqual32;
112 case IR::Opcode::FPUnordLessThanEqual64:
113 return IR::Opcode::FPUnordLessThanEqual32;
114 case IR::Opcode::FPOrdGreaterThanEqual64:
115 return IR::Opcode::FPOrdGreaterThanEqual32;
116 case IR::Opcode::FPUnordGreaterThanEqual64:
117 return IR::Opcode::FPUnordGreaterThanEqual32;
118 case IR::Opcode::FPIsNan64:
119 return IR::Opcode::FPIsNan32;
120 case IR::Opcode::ConvertS16F64:
121 return IR::Opcode::ConvertS16F32;
122 case IR::Opcode::ConvertS32F64:
123 return IR::Opcode::ConvertS32F32;
124 case IR::Opcode::ConvertS64F64:
125 return IR::Opcode::ConvertS64F32;
126 case IR::Opcode::ConvertU16F64:
127 return IR::Opcode::ConvertU16F32;
128 case IR::Opcode::ConvertU32F64:
129 return IR::Opcode::ConvertU32F32;
130 case IR::Opcode::ConvertU64F64:
131 return IR::Opcode::ConvertU64F32;
132 case IR::Opcode::ConvertF32F64:
133 return IR::Opcode::Identity;
134 case IR::Opcode::ConvertF64F32:
135 return IR::Opcode::Identity;
136 case IR::Opcode::ConvertF64S8:
137 return IR::Opcode::ConvertF32S8;
138 case IR::Opcode::ConvertF64S16:
139 return IR::Opcode::ConvertF32S16;
140 case IR::Opcode::ConvertF64S32:
141 return IR::Opcode::ConvertF32S32;
142 case IR::Opcode::ConvertF64S64:
143 return IR::Opcode::ConvertF32S64;
144 case IR::Opcode::ConvertF64U8:
145 return IR::Opcode::ConvertF32U8;
146 case IR::Opcode::ConvertF64U16:
147 return IR::Opcode::ConvertF32U16;
148 case IR::Opcode::ConvertF64U32:
149 return IR::Opcode::ConvertF32U32;
150 case IR::Opcode::ConvertF64U64:
151 return IR::Opcode::ConvertF32U64;
152 default:
153 return op;
154 }
155}
156
157void Lower(IR::Block& block, IR::Inst& inst) {
158 switch (inst.GetOpcode()) {
159 case IR::Opcode::PackDouble2x32: {
160 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
161 inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
162 break;
163 }
164 case IR::Opcode::UnpackDouble2x32: {
165 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
166 inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
167 break;
168 }
169 default:
170 inst.ReplaceOpcode(Replace(inst.GetOpcode()));
171 break;
172 }
173}
174
175} // Anonymous namespace
176
177void LowerFp64ToFp32(IR::Program& program) {
178 for (IR::Block* const block : program.blocks) {
179 for (IR::Inst& inst : block->Instructions()) {
180 Lower(*block, inst);
181 }
182 }
183}
184
185} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 1f8f2ba95..53606b78d 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -17,6 +17,7 @@ void ConstantPropagationPass(Environment& env, IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program); 17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program); 18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
20void LowerFp64ToFp32(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program); 21void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program); 22void LowerInt64ToInt32(IR::Program& program);
22void RescalingPass(IR::Program& program); 23void RescalingPass(IR::Program& program);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 6ecda2984..dd8caa556 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
232 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), 232 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
233 }, 233 },
234 host_info{ 234 host_info{
235 .support_float64 = true,
235 .support_float16 = false, 236 .support_float16 = false,
236 .support_int64 = device.HasShaderInt64(), 237 .support_int64 = device.HasShaderInt64(),
237 .needs_demote_reorder = device.IsAmd(), 238 .needs_demote_reorder = device.IsAmd(),
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 9482e91b0..5734f51e5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -350,6 +350,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
350 .has_broken_spirv_subgroup_mask_vector_extract_dynamic = 350 .has_broken_spirv_subgroup_mask_vector_extract_dynamic =
351 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; 351 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
352 host_info = Shader::HostTranslateInfo{ 352 host_info = Shader::HostTranslateInfo{
353 .support_float64 = device.IsFloat64Supported(),
353 .support_float16 = device.IsFloat16Supported(), 354 .support_float16 = device.IsFloat16Supported(),
354 .support_int64 = device.IsShaderInt64Supported(), 355 .support_int64 = device.IsShaderInt64Supported(),
355 .needs_demote_reorder = 356 .needs_demote_reorder =
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index d62a103a1..0c53e35a6 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -300,6 +300,11 @@ public:
300 return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; 300 return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
301 } 301 }
302 302
303 /// Returns true if the device suppors float64 natively.
304 bool IsFloat64Supported() const {
305 return features.features.shaderFloat64;
306 }
307
303 /// Returns true if the device supports float16 natively. 308 /// Returns true if the device supports float16 natively.
304 bool IsFloat16Supported() const { 309 bool IsFloat16Supported() const {
305 return features.shader_float16_int8.shaderFloat16; 310 return features.shader_float16_int8.shaderFloat16;