summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt3
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp69
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h7
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp12
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h32
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp48
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp146
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp18
-rw-r--r--src/shader_recompiler/environment.h2
-rw-r--r--src/shader_recompiler/file_environment.cpp4
-rw-r--r--src/shader_recompiler/file_environment.h4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp133
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h21
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp73
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.h22
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h10
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.cpp2
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc569
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h11
-rw-r--r--src/shader_recompiler/frontend/ir/value.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc4
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp8
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp232
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp19
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp15
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp199
-rw-r--r--src/shader_recompiler/shader_info.h52
29 files changed, 1378 insertions, 341 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index cc38b28ed..fa268d38f 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC
9 backend/spirv/emit_spirv_control_flow.cpp 9 backend/spirv/emit_spirv_control_flow.cpp
10 backend/spirv/emit_spirv_convert.cpp 10 backend/spirv/emit_spirv_convert.cpp
11 backend/spirv/emit_spirv_floating_point.cpp 11 backend/spirv/emit_spirv_floating_point.cpp
12 backend/spirv/emit_spirv_image.cpp
12 backend/spirv/emit_spirv_integer.cpp 13 backend/spirv/emit_spirv_integer.cpp
13 backend/spirv/emit_spirv_logical.cpp 14 backend/spirv/emit_spirv_logical.cpp
14 backend/spirv/emit_spirv_memory.cpp 15 backend/spirv/emit_spirv_memory.cpp
@@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC
100 frontend/maxwell/translate/impl/predicate_set_predicate.cpp 101 frontend/maxwell/translate/impl/predicate_set_predicate.cpp
101 frontend/maxwell/translate/impl/predicate_set_register.cpp 102 frontend/maxwell/translate/impl/predicate_set_register.cpp
102 frontend/maxwell/translate/impl/select_source_with_predicate.cpp 103 frontend/maxwell/translate/impl/select_source_with_predicate.cpp
104 frontend/maxwell/translate/impl/texture_sample.cpp
103 frontend/maxwell/translate/translate.cpp 105 frontend/maxwell/translate/translate.cpp
104 frontend/maxwell/translate/translate.h 106 frontend/maxwell/translate/translate.h
105 ir_opt/collect_shader_info_pass.cpp 107 ir_opt/collect_shader_info_pass.cpp
@@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC
110 ir_opt/lower_fp16_to_fp32.cpp 112 ir_opt/lower_fp16_to_fp32.cpp
111 ir_opt/passes.h 113 ir_opt/passes.h
112 ir_opt/ssa_rewrite_pass.cpp 114 ir_opt/ssa_rewrite_pass.cpp
115 ir_opt/texture_pass.cpp
113 ir_opt/verification_pass.cpp 116 ir_opt/verification_pass.cpp
114 object_pool.h 117 object_pool.h
115 profile.h 118 profile.h
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index d2dbd56d4..21900d387 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -12,6 +12,43 @@
12#include "shader_recompiler/backend/spirv/emit_context.h" 12#include "shader_recompiler/backend/spirv/emit_context.h"
13 13
14namespace Shader::Backend::SPIRV { 14namespace Shader::Backend::SPIRV {
15namespace {
16Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
17 const spv::ImageFormat format{spv::ImageFormat::Unknown};
18 const Id type{ctx.F32[1]};
19 switch (desc.type) {
20 case TextureType::Color1D:
21 return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format);
22 case TextureType::ColorArray1D:
23 return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format);
24 case TextureType::Color2D:
25 return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format);
26 case TextureType::ColorArray2D:
27 return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format);
28 case TextureType::Color3D:
29 return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format);
30 case TextureType::ColorCube:
31 return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format);
32 case TextureType::ColorArrayCube:
33 return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format);
34 case TextureType::Shadow1D:
35 return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format);
36 case TextureType::ShadowArray1D:
37 return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format);
38 case TextureType::Shadow2D:
39 return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format);
40 case TextureType::ShadowArray2D:
41 return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format);
42 case TextureType::Shadow3D:
43 return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format);
44 case TextureType::ShadowCube:
45 return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format);
46 case TextureType::ShadowArrayCube:
47 return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format);
48 }
49 throw InvalidArgument("Invalid texture type {}", desc.type);
50}
51} // Anonymous namespace
15 52
16void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { 53void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
17 defs[0] = sirit_ctx.Name(base_type, name); 54 defs[0] = sirit_ctx.Name(base_type, name);
@@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program)
35 u32 binding{}; 72 u32 binding{};
36 DefineConstantBuffers(program.info, binding); 73 DefineConstantBuffers(program.info, binding);
37 DefineStorageBuffers(program.info, binding); 74 DefineStorageBuffers(program.info, binding);
75 DefineTextures(program.info, binding);
38 76
39 DefineLabels(program); 77 DefineLabels(program);
40} 78}
@@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) {
46 return value.Inst()->Definition<Id>(); 84 return value.Inst()->Definition<Id>();
47 } 85 }
48 switch (value.Type()) { 86 switch (value.Type()) {
87 case IR::Type::Void:
88 // Void instructions are used for optional arguments (e.g. texture offsets)
89 // They are not meant to be used in the SPIR-V module
90 return Id{};
49 case IR::Type::U1: 91 case IR::Type::U1:
50 return value.U1() ? true_value : false_value; 92 return value.U1() ? true_value : false_value;
51 case IR::Type::U32: 93 case IR::Type::U32:
@@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
122 uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); 164 uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
123 165
124 u32 index{}; 166 u32 index{};
125 for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { 167 for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
126 const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; 168 const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
127 Decorate(id, spv::Decoration::Binding, binding); 169 Decorate(id, spv::Decoration::Binding, binding);
128 Decorate(id, spv::Decoration::DescriptorSet, 0U); 170 Decorate(id, spv::Decoration::DescriptorSet, 0U);
@@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
152 storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); 194 storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
153 195
154 u32 index{}; 196 u32 index{};
155 for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { 197 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
156 const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; 198 const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
157 Decorate(id, spv::Decoration::Binding, binding); 199 Decorate(id, spv::Decoration::Binding, binding);
158 Decorate(id, spv::Decoration::DescriptorSet, 0U); 200 Decorate(id, spv::Decoration::DescriptorSet, 0U);
@@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
163 } 205 }
164} 206}
165 207
208void EmitContext::DefineTextures(const Info& info, u32& binding) {
209 textures.reserve(info.texture_descriptors.size());
210 for (const TextureDescriptor& desc : info.texture_descriptors) {
211 if (desc.count != 1) {
212 throw NotImplementedException("Array of textures");
213 }
214 const Id type{TypeSampledImage(ImageType(*this, desc))};
215 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)};
216 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
217 Decorate(id, spv::Decoration::Binding, binding);
218 Decorate(id, spv::Decoration::DescriptorSet, 0U);
219 Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset));
220 for (u32 index = 0; index < desc.count; ++index) {
221 // TODO: Pass count info
222 textures.push_back(TextureDefinition{
223 .id{id},
224 .type{type},
225 });
226 }
227 binding += desc.count;
228 }
229}
230
166void EmitContext::DefineLabels(IR::Program& program) { 231void EmitContext::DefineLabels(IR::Program& program) {
167 for (const IR::Function& function : program.functions) { 232 for (const IR::Function& function : program.functions) {
168 for (IR::Block* const block : function.blocks) { 233 for (IR::Block* const block : function.blocks) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index d20cf387e..8b3109eb8 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -29,6 +29,11 @@ private:
29 std::array<Id, 4> defs{}; 29 std::array<Id, 4> defs{};
30}; 30};
31 31
32struct TextureDefinition {
33 Id id;
34 Id type;
35};
36
32class EmitContext final : public Sirit::Module { 37class EmitContext final : public Sirit::Module {
33public: 38public:
34 explicit EmitContext(const Profile& profile, IR::Program& program); 39 explicit EmitContext(const Profile& profile, IR::Program& program);
@@ -56,6 +61,7 @@ public:
56 61
57 std::array<Id, Info::MAX_CBUFS> cbufs{}; 62 std::array<Id, Info::MAX_CBUFS> cbufs{};
58 std::array<Id, Info::MAX_SSBOS> ssbos{}; 63 std::array<Id, Info::MAX_SSBOS> ssbos{};
64 std::vector<TextureDefinition> textures;
59 65
60 Id workgroup_id{}; 66 Id workgroup_id{};
61 Id local_invocation_id{}; 67 Id local_invocation_id{};
@@ -66,6 +72,7 @@ private:
66 void DefineSpecialVariables(const Info& info); 72 void DefineSpecialVariables(const Info& info);
67 void DefineConstantBuffers(const Info& info, u32& binding); 73 void DefineConstantBuffers(const Info& info, u32& binding);
68 void DefineStorageBuffers(const Info& info, u32& binding); 74 void DefineStorageBuffers(const Info& info, u32& binding);
75 void DefineTextures(const Info& info, u32& binding);
69 void DefineLabels(IR::Program& program); 76 void DefineLabels(IR::Program& program);
70}; 77};
71 78
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 8097fe82d..a94e9cb2d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -221,6 +221,14 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program
221 workgroup_size[2]); 221 workgroup_size[2]);
222 222
223 SetupDenormControl(profile, program, ctx, func); 223 SetupDenormControl(profile, program, ctx, func);
224 if (info.uses_sampled_1d) {
225 ctx.AddCapability(spv::Capability::Sampled1D);
226 }
227 if (info.uses_sparse_residency) {
228 ctx.AddCapability(spv::Capability::SparseResidency);
229 }
230 // TODO: Track this usage
231 ctx.AddCapability(spv::Capability::ImageGatherExtended);
224 232
225 return ctx.Assemble(); 233 return ctx.Assemble();
226} 234}
@@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) {
259 throw LogicError("Unreachable instruction"); 267 throw LogicError("Unreachable instruction");
260} 268}
261 269
270void EmitGetSparseFromOp(EmitContext&) {
271 throw LogicError("Unreachable instruction");
272}
273
262} // namespace Shader::Backend::SPIRV 274} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 92387ca28..69698c478 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va
83 Id value); 83 Id value);
84void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 84void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
85 Id value); 85 Id value);
86void EmitWriteStorage128(EmitContext& ctx); 86void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
87 Id value);
87Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); 88Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
88Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); 89Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
89Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); 90Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
@@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx);
145void EmitGetSignFromOp(EmitContext& ctx); 146void EmitGetSignFromOp(EmitContext& ctx);
146void EmitGetCarryFromOp(EmitContext& ctx); 147void EmitGetCarryFromOp(EmitContext& ctx);
147void EmitGetOverflowFromOp(EmitContext& ctx); 148void EmitGetOverflowFromOp(EmitContext& ctx);
149void EmitGetSparseFromOp(EmitContext& ctx);
148Id EmitFPAbs16(EmitContext& ctx, Id value); 150Id EmitFPAbs16(EmitContext& ctx, Id value);
149Id EmitFPAbs32(EmitContext& ctx, Id value); 151Id EmitFPAbs32(EmitContext& ctx, Id value);
150Id EmitFPAbs64(EmitContext& ctx, Id value); 152Id EmitFPAbs64(EmitContext& ctx, Id value);
@@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value);
291Id EmitConvertF32F16(EmitContext& ctx, Id value); 293Id EmitConvertF32F16(EmitContext& ctx, Id value);
292Id EmitConvertF32F64(EmitContext& ctx, Id value); 294Id EmitConvertF32F64(EmitContext& ctx, Id value);
293Id EmitConvertF64F32(EmitContext& ctx, Id value); 295Id EmitConvertF64F32(EmitContext& ctx, Id value);
296Id EmitConvertF16S32(EmitContext& ctx, Id value);
297Id EmitConvertF16S64(EmitContext& ctx, Id value);
298Id EmitConvertF16U32(EmitContext& ctx, Id value);
299Id EmitConvertF16U64(EmitContext& ctx, Id value);
300Id EmitConvertF32S32(EmitContext& ctx, Id value);
301Id EmitConvertF32S64(EmitContext& ctx, Id value);
302Id EmitConvertF32U32(EmitContext& ctx, Id value);
303Id EmitConvertF32U64(EmitContext& ctx, Id value);
304Id EmitConvertF64S32(EmitContext& ctx, Id value);
305Id EmitConvertF64S64(EmitContext& ctx, Id value);
306Id EmitConvertF64U32(EmitContext& ctx, Id value);
307Id EmitConvertF64U64(EmitContext& ctx, Id value);
308Id EmitBindlessImageSampleImplicitLod(EmitContext&);
309Id EmitBindlessImageSampleExplicitLod(EmitContext&);
310Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
311Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
312Id EmitBoundImageSampleImplicitLod(EmitContext&);
313Id EmitBoundImageSampleExplicitLod(EmitContext&);
314Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
315Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
316Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
317 Id bias_lc, Id offset);
318Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
319 Id lod_lc, Id offset);
320Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
321 Id coords, Id dref, Id bias_lc, Id offset);
322Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
323 Id coords, Id dref, Id lod_lc, Id offset);
294 324
295} // namespace Shader::Backend::SPIRV 325} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
index edcc2a1cc..2aff673aa 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) {
102 return ctx.OpFConvert(ctx.F64[1], value); 102 return ctx.OpFConvert(ctx.F64[1], value);
103} 103}
104 104
105Id EmitConvertF16S32(EmitContext& ctx, Id value) {
106 return ctx.OpConvertSToF(ctx.F16[1], value);
107}
108
109Id EmitConvertF16S64(EmitContext& ctx, Id value) {
110 return ctx.OpConvertSToF(ctx.F16[1], value);
111}
112
113Id EmitConvertF16U32(EmitContext& ctx, Id value) {
114 return ctx.OpConvertUToF(ctx.F16[1], value);
115}
116
117Id EmitConvertF16U64(EmitContext& ctx, Id value) {
118 return ctx.OpConvertUToF(ctx.F16[1], value);
119}
120
121Id EmitConvertF32S32(EmitContext& ctx, Id value) {
122 return ctx.OpConvertSToF(ctx.F32[1], value);
123}
124
125Id EmitConvertF32S64(EmitContext& ctx, Id value) {
126 return ctx.OpConvertSToF(ctx.F32[1], value);
127}
128
129Id EmitConvertF32U32(EmitContext& ctx, Id value) {
130 return ctx.OpConvertUToF(ctx.F32[1], value);
131}
132
133Id EmitConvertF32U64(EmitContext& ctx, Id value) {
134 return ctx.OpConvertUToF(ctx.F32[1], value);
135}
136
137Id EmitConvertF64S32(EmitContext& ctx, Id value) {
138 return ctx.OpConvertSToF(ctx.F64[1], value);
139}
140
141Id EmitConvertF64S64(EmitContext& ctx, Id value) {
142 return ctx.OpConvertSToF(ctx.F64[1], value);
143}
144
145Id EmitConvertF64U32(EmitContext& ctx, Id value) {
146 return ctx.OpConvertUToF(ctx.F64[1], value);
147}
148
149Id EmitConvertF64U64(EmitContext& ctx, Id value) {
150 return ctx.OpConvertUToF(ctx.F64[1], value);
151}
152
105} // namespace Shader::Backend::SPIRV 153} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 000000000..5f4783c95
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,146 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <boost/container/static_vector.hpp>
6
7#include "shader_recompiler/backend/spirv/emit_spirv.h"
8#include "shader_recompiler/frontend/ir/modifiers.h"
9
10namespace Shader::Backend::SPIRV {
11namespace {
12class ImageOperands {
13public:
14 explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
15 Id lod, Id offset) {
16 if (has_bias) {
17 const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
18 Add(spv::ImageOperandsMask::Bias, bias);
19 }
20 if (has_lod) {
21 const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
22 Add(spv::ImageOperandsMask::Lod, lod_value);
23 }
24 if (Sirit::ValidId(offset)) {
25 Add(spv::ImageOperandsMask::Offset, offset);
26 }
27 if (has_lod_clamp) {
28 const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
29 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
30 }
31 }
32
33 void Add(spv::ImageOperandsMask new_mask, Id value) {
34 mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
35 static_cast<unsigned>(new_mask));
36 operands.push_back(value);
37 }
38
39 std::span<const Id> Span() const noexcept {
40 return std::span{operands.data(), operands.size()};
41 }
42
43 spv::ImageOperandsMask Mask() const noexcept {
44 return mask;
45 }
46
47private:
48 boost::container::static_vector<Id, 3> operands;
49 spv::ImageOperandsMask mask{};
50};
51
52Id Texture(EmitContext& ctx, const IR::Value& index) {
53 if (index.IsImmediate()) {
54 const TextureDefinition def{ctx.textures.at(index.U32())};
55 return ctx.OpLoad(def.type, def.id);
56 }
57 throw NotImplementedException("Indirect texture sample");
58}
59
60template <typename MethodPtrType, typename... Args>
61Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
62 Id result_type, Args&&... args) {
63 IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
64 if (!sparse) {
65 return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...);
66 }
67 const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
68 const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
69 const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
70 sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
71 sparse->Invalidate();
72 return ctx.OpCompositeExtract(result_type, sample, 1U);
73}
74} // Anonymous namespace
75
76Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
77 throw LogicError("Unreachable instruction");
78}
79
80Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
81 throw LogicError("Unreachable instruction");
82}
83
84Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
85 throw LogicError("Unreachable instruction");
86}
87
88Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
89 throw LogicError("Unreachable instruction");
90}
91
92Id EmitBoundImageSampleImplicitLod(EmitContext&) {
93 throw LogicError("Unreachable instruction");
94}
95
96Id EmitBoundImageSampleExplicitLod(EmitContext&) {
97 throw LogicError("Unreachable instruction");
98}
99
100Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
101 throw LogicError("Unreachable instruction");
102}
103
104Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
105 throw LogicError("Unreachable instruction");
106}
107
108Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
109 Id bias_lc, Id offset) {
110 const auto info{inst->Flags<IR::TextureInstInfo>()};
111 const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
112 offset);
113 return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
114 &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index),
115 coords, operands.Mask(), operands.Span());
116}
117
118Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
119 Id lod_lc, Id offset) {
120 const auto info{inst->Flags<IR::TextureInstInfo>()};
121 const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset);
122 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
123 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index),
124 coords, operands.Mask(), operands.Span());
125}
126
127Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
128 Id coords, Id dref, Id bias_lc, Id offset) {
129 const auto info{inst->Flags<IR::TextureInstInfo>()};
130 const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
131 offset);
132 return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
133 &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
134 Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
135}
136
137Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
138 Id coords, Id dref, Id lod_lc, Id offset) {
139 const auto info{inst->Flags<IR::TextureInstInfo>()};
140 const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset);
141 return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
142 &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
143 Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
144}
145
146} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
index 808c1b401..7d3efc741 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
154 ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); 154 ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
155} 155}
156 156
157void EmitWriteStorage128(EmitContext&) { 157void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
158 throw NotImplementedException("SPIR-V Instruction"); 158 Id value) {
159 if (!binding.IsImmediate()) {
160 throw NotImplementedException("Dynamic storage buffer indexing");
161 }
162 // TODO: Support reinterpreting bindings, guaranteed to be aligned
163 const Id ssbo{ctx.ssbos[binding.U32()]};
164 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
165 for (u32 element = 0; element < 4; ++element) {
166 Id index = base_index;
167 if (element > 0) {
168 index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element));
169 }
170 const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)};
171 ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element));
172 }
159} 173}
160 174
161} // namespace Shader::Backend::SPIRV 175} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0ba681fb9..0fcb68050 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -12,6 +12,8 @@ public:
12 12
13 [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; 13 [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
14 14
15 [[nodiscard]] virtual u32 TextureBoundBuffer() = 0;
16
15 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; 17 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
16}; 18};
17 19
diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp
index 5127523f9..21700c72b 100644
--- a/src/shader_recompiler/file_environment.cpp
+++ b/src/shader_recompiler/file_environment.cpp
@@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) {
39 return data[offset / 8]; 39 return data[offset / 8];
40} 40}
41 41
42u32 FileEnvironment::TextureBoundBuffer() {
43 throw NotImplementedException("Texture bound buffer serialization");
44}
45
42std::array<u32, 3> FileEnvironment::WorkgroupSize() { 46std::array<u32, 3> FileEnvironment::WorkgroupSize() {
43 return {1, 1, 1}; 47 return {1, 1, 1};
44} 48}
diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h
index b8c4bbadd..62302bc8e 100644
--- a/src/shader_recompiler/file_environment.h
+++ b/src/shader_recompiler/file_environment.h
@@ -3,7 +3,7 @@
3#include <vector> 3#include <vector>
4 4
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "environment.h" 6#include "shader_recompiler/environment.h"
7 7
8namespace Shader { 8namespace Shader {
9 9
@@ -14,6 +14,8 @@ public:
14 14
15 u64 ReadInstruction(u32 offset) override; 15 u64 ReadInstruction(u32 offset) override;
16 16
17 u32 TextureBoundBuffer() override;
18
17 std::array<u32, 3> WorkgroupSize() override; 19 std::array<u32, 3> WorkgroupSize() override;
18 20
19private: 21private:
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index f38b46bac..ae3354c66 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -7,11 +7,24 @@
7#include "shader_recompiler/frontend/ir/value.h" 7#include "shader_recompiler/frontend/ir/value.h"
8 8
9namespace Shader::IR { 9namespace Shader::IR {
10 10namespace {
11[[noreturn]] static void ThrowInvalidType(Type type) { 11[[noreturn]] void ThrowInvalidType(Type type) {
12 throw InvalidArgument("Invalid type {}", type); 12 throw InvalidArgument("Invalid type {}", type);
13} 13}
14 14
15Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
16 if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
17 return ir.CompositeConstruct(bias_lod, lod_clamp);
18 } else if (!bias_lod.IsEmpty()) {
19 return bias_lod;
20 } else if (!lod_clamp.IsEmpty()) {
21 return lod_clamp;
22 } else {
23 return Value{};
24 }
25}
26} // Anonymous namespace
27
15U1 IREmitter::Imm1(bool value) const { 28U1 IREmitter::Imm1(bool value) const {
16 return U1{Value{value}}; 29 return U1{Value{value}};
17} 30}
@@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
261 return Inst<U1>(Opcode::GetOverflowFromOp, op); 274 return Inst<U1>(Opcode::GetOverflowFromOp, op);
262} 275}
263 276
277U1 IREmitter::GetSparseFromOp(const Value& op) {
278 return Inst<U1>(Opcode::GetSparseFromOp, op);
279}
280
264F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { 281F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
265 if (a.Type() != a.Type()) { 282 if (a.Type() != a.Type()) {
266 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); 283 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
@@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
1035 } 1052 }
1036} 1053}
1037 1054
1055F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) {
1056 switch (bitsize) {
1057 case 16:
1058 switch (value.Type()) {
1059 case Type::U32:
1060 return Inst<F16>(Opcode::ConvertF16S32, value);
1061 case Type::U64:
1062 return Inst<F16>(Opcode::ConvertF16S64, value);
1063 default:
1064 ThrowInvalidType(value.Type());
1065 }
1066 case 32:
1067 switch (value.Type()) {
1068 case Type::U32:
1069 return Inst<F32>(Opcode::ConvertF32S32, value);
1070 case Type::U64:
1071 return Inst<F32>(Opcode::ConvertF32S64, value);
1072 default:
1073 ThrowInvalidType(value.Type());
1074 }
1075 case 64:
1076 switch (value.Type()) {
1077 case Type::U32:
1078 return Inst<F16>(Opcode::ConvertF64S32, value);
1079 case Type::U64:
1080 return Inst<F16>(Opcode::ConvertF64S64, value);
1081 default:
1082 ThrowInvalidType(value.Type());
1083 }
1084 default:
1085 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1086 }
1087}
1088
1089F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) {
1090 switch (bitsize) {
1091 case 16:
1092 switch (value.Type()) {
1093 case Type::U32:
1094 return Inst<F16>(Opcode::ConvertF16U32, value);
1095 case Type::U64:
1096 return Inst<F16>(Opcode::ConvertF16U64, value);
1097 default:
1098 ThrowInvalidType(value.Type());
1099 }
1100 case 32:
1101 switch (value.Type()) {
1102 case Type::U32:
1103 return Inst<F32>(Opcode::ConvertF32U32, value);
1104 case Type::U64:
1105 return Inst<F32>(Opcode::ConvertF32U64, value);
1106 default:
1107 ThrowInvalidType(value.Type());
1108 }
1109 case 64:
1110 switch (value.Type()) {
1111 case Type::U32:
1112 return Inst<F16>(Opcode::ConvertF64U32, value);
1113 case Type::U64:
1114 return Inst<F16>(Opcode::ConvertF64U64, value);
1115 default:
1116 ThrowInvalidType(value.Type());
1117 }
1118 default:
1119 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1120 }
1121}
1122
1123F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) {
1124 if (is_signed) {
1125 return ConvertSToF(bitsize, value);
1126 } else {
1127 return ConvertUToF(bitsize, value);
1128 }
1129}
1130
1038U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { 1131U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
1039 switch (result_bitsize) { 1132 switch (result_bitsize) {
1040 case 32: 1133 case 32:
@@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
1107 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); 1200 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1108} 1201}
1109 1202
1203Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
1204 const Value& offset, const F32& lod_clamp,
1205 TextureInstInfo info) {
1206 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1207 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
1208 : Opcode::BindlessImageSampleImplicitLod};
1209 return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
1210}
1211
1212Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
1213 const Value& offset, const F32& lod_clamp,
1214 TextureInstInfo info) {
1215 const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
1216 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
1217 : Opcode::BindlessImageSampleExplicitLod};
1218 return Inst(op, Flags{info}, handle, coords, lod_lc, offset);
1219}
1220
1221F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
1222 const F32& bias, const Value& offset,
1223 const F32& lod_clamp, TextureInstInfo info) {
1224 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1225 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
1226 : Opcode::BindlessImageSampleDrefImplicitLod};
1227 return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
1228}
1229
1230F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
1231 const F32& lod, const Value& offset, const F32& lod_clamp,
1232 TextureInstInfo info) {
1233 const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
1234 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
1235 : Opcode::BindlessImageSampleDrefExplicitLod};
1236 return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
1237}
1238
1110} // namespace Shader::IR 1239} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 6e29bf0e2..cb2a7710a 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -91,6 +91,7 @@ public:
91 [[nodiscard]] U1 GetSignFromOp(const Value& op); 91 [[nodiscard]] U1 GetSignFromOp(const Value& op);
92 [[nodiscard]] U1 GetCarryFromOp(const Value& op); 92 [[nodiscard]] U1 GetCarryFromOp(const Value& op);
93 [[nodiscard]] U1 GetOverflowFromOp(const Value& op); 93 [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
94 [[nodiscard]] U1 GetSparseFromOp(const Value& op);
94 95
95 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); 96 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
96 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); 97 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
@@ -159,7 +160,7 @@ public:
159 [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, 160 [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
160 const U32& count); 161 const U32& count);
161 [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, 162 [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
162 bool is_signed); 163 bool is_signed = false);
163 [[nodiscard]] U32 BitReverse(const U32& value); 164 [[nodiscard]] U32 BitReverse(const U32& value);
164 [[nodiscard]] U32 BitCount(const U32& value); 165 [[nodiscard]] U32 BitCount(const U32& value);
165 [[nodiscard]] U32 BitwiseNot(const U32& value); 166 [[nodiscard]] U32 BitwiseNot(const U32& value);
@@ -186,10 +187,28 @@ public:
186 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); 187 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
187 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); 188 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
188 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); 189 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
190 [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value);
191 [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value);
192 [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value);
189 193
190 [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); 194 [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
191 [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); 195 [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
192 196
197 [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
198 const F32& bias, const Value& offset,
199 const F32& lod_clamp, TextureInstInfo info);
200 [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
201 const F32& lod, const Value& offset,
202 const F32& lod_clamp, TextureInstInfo info);
203 [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
204 const F32& dref, const F32& bias,
205 const Value& offset, const F32& lod_clamp,
206 TextureInstInfo info);
207 [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
208 const F32& dref, const F32& lod,
209 const Value& offset, const F32& lod_clamp,
210 TextureInstInfo info);
211
193private: 212private:
194 IR::Block::iterator insertion_point; 213 IR::Block::iterator insertion_point;
195 214
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index d6a9be87d..88e186f21 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -10,26 +10,27 @@
10#include "shader_recompiler/frontend/ir/type.h" 10#include "shader_recompiler/frontend/ir/type.h"
11 11
12namespace Shader::IR { 12namespace Shader::IR {
13 13namespace {
14static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { 14void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
15 if (inst && inst->Opcode() != opcode) { 15 if (inst && inst->Opcode() != opcode) {
16 throw LogicError("Invalid pseudo-instruction"); 16 throw LogicError("Invalid pseudo-instruction");
17 } 17 }
18} 18}
19 19
20static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { 20void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
21 if (dest_inst) { 21 if (dest_inst) {
22 throw LogicError("Only one of each type of pseudo-op allowed"); 22 throw LogicError("Only one of each type of pseudo-op allowed");
23 } 23 }
24 dest_inst = pseudo_inst; 24 dest_inst = pseudo_inst;
25} 25}
26 26
27static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { 27void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
28 if (inst->Opcode() != expected_opcode) { 28 if (inst->Opcode() != expected_opcode) {
29 throw LogicError("Undoing use of invalid pseudo-op"); 29 throw LogicError("Undoing use of invalid pseudo-op");
30 } 30 }
31 inst = nullptr; 31 inst = nullptr;
32} 32}
33} // Anonymous namespace
33 34
34Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { 35Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
35 if (op == Opcode::Phi) { 36 if (op == Opcode::Phi) {
@@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept {
82 case Opcode::GetSignFromOp: 83 case Opcode::GetSignFromOp:
83 case Opcode::GetCarryFromOp: 84 case Opcode::GetCarryFromOp:
84 case Opcode::GetOverflowFromOp: 85 case Opcode::GetOverflowFromOp:
86 case Opcode::GetSparseFromOp:
85 return true; 87 return true;
86 default: 88 default:
87 return false; 89 return false;
@@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const {
96 [](const IR::Value& value) { return value.IsImmediate(); }); 98 [](const IR::Value& value) { return value.IsImmediate(); });
97} 99}
98 100
99bool Inst::HasAssociatedPseudoOperation() const noexcept {
100 return zero_inst || sign_inst || carry_inst || overflow_inst;
101}
102
103Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { 101Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
104 // This is faster than doing a search through the block. 102 if (!associated_insts) {
103 return nullptr;
104 }
105 switch (opcode) { 105 switch (opcode) {
106 case Opcode::GetZeroFromOp: 106 case Opcode::GetZeroFromOp:
107 CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); 107 CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
108 return zero_inst; 108 return associated_insts->zero_inst;
109 case Opcode::GetSignFromOp: 109 case Opcode::GetSignFromOp:
110 CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); 110 CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
111 return sign_inst; 111 return associated_insts->sign_inst;
112 case Opcode::GetCarryFromOp: 112 case Opcode::GetCarryFromOp:
113 CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); 113 CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
114 return carry_inst; 114 return associated_insts->carry_inst;
115 case Opcode::GetOverflowFromOp: 115 case Opcode::GetOverflowFromOp:
116 CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); 116 CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
117 return overflow_inst; 117 return associated_insts->overflow_inst;
118 case Opcode::GetSparseFromOp:
119 CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
120 return associated_insts->sparse_inst;
118 default: 121 default:
119 throw InvalidArgument("{} is not a pseudo-instruction", opcode); 122 throw InvalidArgument("{} is not a pseudo-instruction", opcode);
120 } 123 }
@@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
220 op = opcode; 223 op = opcode;
221} 224}
222 225
226void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
227 if (!associated_insts) {
228 associated_insts = std::make_unique<AssociatedInsts>();
229 }
230}
231
223void Inst::Use(const Value& value) { 232void Inst::Use(const Value& value) {
224 Inst* const inst{value.Inst()}; 233 Inst* const inst{value.Inst()};
225 ++inst->use_count; 234 ++inst->use_count;
226 235
236 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
227 switch (op) { 237 switch (op) {
228 case Opcode::GetZeroFromOp: 238 case Opcode::GetZeroFromOp:
229 SetPseudoInstruction(inst->zero_inst, this); 239 AllocAssociatedInsts(assoc_inst);
240 SetPseudoInstruction(assoc_inst->zero_inst, this);
230 break; 241 break;
231 case Opcode::GetSignFromOp: 242 case Opcode::GetSignFromOp:
232 SetPseudoInstruction(inst->sign_inst, this); 243 AllocAssociatedInsts(assoc_inst);
244 SetPseudoInstruction(assoc_inst->sign_inst, this);
233 break; 245 break;
234 case Opcode::GetCarryFromOp: 246 case Opcode::GetCarryFromOp:
235 SetPseudoInstruction(inst->carry_inst, this); 247 AllocAssociatedInsts(assoc_inst);
248 SetPseudoInstruction(assoc_inst->carry_inst, this);
236 break; 249 break;
237 case Opcode::GetOverflowFromOp: 250 case Opcode::GetOverflowFromOp:
238 SetPseudoInstruction(inst->overflow_inst, this); 251 AllocAssociatedInsts(assoc_inst);
252 SetPseudoInstruction(assoc_inst->overflow_inst, this);
253 break;
254 case Opcode::GetSparseFromOp:
255 AllocAssociatedInsts(assoc_inst);
256 SetPseudoInstruction(assoc_inst->sparse_inst, this);
239 break; 257 break;
240 default: 258 default:
241 break; 259 break;
@@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) {
246 Inst* const inst{value.Inst()}; 264 Inst* const inst{value.Inst()};
247 --inst->use_count; 265 --inst->use_count;
248 266
267 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
249 switch (op) { 268 switch (op) {
250 case Opcode::GetZeroFromOp: 269 case Opcode::GetZeroFromOp:
251 RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); 270 AllocAssociatedInsts(assoc_inst);
271 RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
252 break; 272 break;
253 case Opcode::GetSignFromOp: 273 case Opcode::GetSignFromOp:
254 RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); 274 AllocAssociatedInsts(assoc_inst);
275 RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
255 break; 276 break;
256 case Opcode::GetCarryFromOp: 277 case Opcode::GetCarryFromOp:
257 RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); 278 AllocAssociatedInsts(assoc_inst);
279 RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
258 break; 280 break;
259 case Opcode::GetOverflowFromOp: 281 case Opcode::GetOverflowFromOp:
260 RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); 282 AllocAssociatedInsts(assoc_inst);
283 RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
261 break; 284 break;
262 default: 285 default:
263 break; 286 break;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 321393dd7..d5336c438 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -22,7 +22,7 @@ namespace Shader::IR {
22 22
23class Block; 23class Block;
24 24
25constexpr size_t MAX_ARG_COUNT = 4; 25struct AssociatedInsts;
26 26
27class Inst : public boost::intrusive::list_base_hook<> { 27class Inst : public boost::intrusive::list_base_hook<> {
28public: 28public:
@@ -50,6 +50,11 @@ public:
50 return op; 50 return op;
51 } 51 }
52 52
53 /// Determines if there is a pseudo-operation associated with this instruction.
54 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
55 return associated_insts != nullptr;
56 }
57
53 /// Determines whether or not this instruction may have side effects. 58 /// Determines whether or not this instruction may have side effects.
54 [[nodiscard]] bool MayHaveSideEffects() const noexcept; 59 [[nodiscard]] bool MayHaveSideEffects() const noexcept;
55 60
@@ -60,8 +65,6 @@ public:
60 /// Determines if all arguments of this instruction are immediates. 65 /// Determines if all arguments of this instruction are immediates.
61 [[nodiscard]] bool AreAllArgsImmediates() const; 66 [[nodiscard]] bool AreAllArgsImmediates() const;
62 67
63 /// Determines if there is a pseudo-operation associated with this instruction.
64 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
65 /// Gets a pseudo-operation associated with this instruction 68 /// Gets a pseudo-operation associated with this instruction
66 [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); 69 [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
67 70
@@ -122,14 +125,21 @@ private:
122 u32 definition{}; 125 u32 definition{};
123 union { 126 union {
124 NonTriviallyDummy dummy{}; 127 NonTriviallyDummy dummy{};
125 std::array<Value, MAX_ARG_COUNT> args;
126 std::vector<std::pair<Block*, Value>> phi_args; 128 std::vector<std::pair<Block*, Value>> phi_args;
129 std::array<Value, 5> args;
130 };
131 std::unique_ptr<AssociatedInsts> associated_insts;
132};
133static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
134
135struct AssociatedInsts {
136 union {
137 Inst* sparse_inst;
138 Inst* zero_inst{};
127 }; 139 };
128 Inst* zero_inst{};
129 Inst* sign_inst{}; 140 Inst* sign_inst{};
130 Inst* carry_inst{}; 141 Inst* carry_inst{};
131 Inst* overflow_inst{}; 142 Inst* overflow_inst{};
132}; 143};
133static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size");
134 144
135} // namespace Shader::IR 145} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 44652eae7..ad07700ae 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -4,7 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
9#include "shader_recompiler/shader_info.h"
8 10
9namespace Shader::IR { 11namespace Shader::IR {
10 12
@@ -30,4 +32,12 @@ struct FpControl {
30}; 32};
31static_assert(sizeof(FpControl) <= sizeof(u32)); 33static_assert(sizeof(FpControl) <= sizeof(u32));
32 34
35union TextureInstInfo {
36 u32 raw;
37 BitField<0, 8, TextureType> type;
38 BitField<8, 1, u32> has_bias;
39 BitField<16, 1, u32> has_lod_clamp;
40};
41static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
42
33} // namespace Shader::IR 43} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 1f188411a..8492a13d5 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -14,7 +14,7 @@ namespace {
14struct OpcodeMeta { 14struct OpcodeMeta {
15 std::string_view name; 15 std::string_view name;
16 Type type; 16 Type type;
17 std::array<Type, 4> arg_types; 17 std::array<Type, 5> arg_types;
18}; 18};
19 19
20using enum Type; 20using enum Type;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index c4e72c84d..aa011fab1 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -2,301 +2,330 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... 5// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
6OPCODE(Phi, Opaque, ) 6OPCODE(Phi, Opaque, )
7OPCODE(Identity, Opaque, Opaque, ) 7OPCODE(Identity, Opaque, Opaque, )
8OPCODE(Void, Void, ) 8OPCODE(Void, Void, )
9 9
10// Control flow 10// Control flow
11OPCODE(Branch, Void, Label, ) 11OPCODE(Branch, Void, Label, )
12OPCODE(BranchConditional, Void, U1, Label, Label, ) 12OPCODE(BranchConditional, Void, U1, Label, Label, )
13OPCODE(LoopMerge, Void, Label, Label, ) 13OPCODE(LoopMerge, Void, Label, Label, )
14OPCODE(SelectionMerge, Void, Label, ) 14OPCODE(SelectionMerge, Void, Label, )
15OPCODE(Return, Void, ) 15OPCODE(Return, Void, )
16 16
17// Context getters/setters 17// Context getters/setters
18OPCODE(GetRegister, U32, Reg, ) 18OPCODE(GetRegister, U32, Reg, )
19OPCODE(SetRegister, Void, Reg, U32, ) 19OPCODE(SetRegister, Void, Reg, U32, )
20OPCODE(GetPred, U1, Pred, ) 20OPCODE(GetPred, U1, Pred, )
21OPCODE(SetPred, Void, Pred, U1, ) 21OPCODE(SetPred, Void, Pred, U1, )
22OPCODE(GetGotoVariable, U1, U32, ) 22OPCODE(GetGotoVariable, U1, U32, )
23OPCODE(SetGotoVariable, Void, U32, U1, ) 23OPCODE(SetGotoVariable, Void, U32, U1, )
24OPCODE(GetCbuf, U32, U32, U32, ) 24OPCODE(GetCbuf, U32, U32, U32, )
25OPCODE(GetAttribute, U32, Attribute, ) 25OPCODE(GetAttribute, U32, Attribute, )
26OPCODE(SetAttribute, Void, Attribute, U32, ) 26OPCODE(SetAttribute, Void, Attribute, U32, )
27OPCODE(GetAttributeIndexed, U32, U32, ) 27OPCODE(GetAttributeIndexed, U32, U32, )
28OPCODE(SetAttributeIndexed, Void, U32, U32, ) 28OPCODE(SetAttributeIndexed, Void, U32, U32, )
29OPCODE(GetZFlag, U1, Void, ) 29OPCODE(GetZFlag, U1, Void, )
30OPCODE(GetSFlag, U1, Void, ) 30OPCODE(GetSFlag, U1, Void, )
31OPCODE(GetCFlag, U1, Void, ) 31OPCODE(GetCFlag, U1, Void, )
32OPCODE(GetOFlag, U1, Void, ) 32OPCODE(GetOFlag, U1, Void, )
33OPCODE(SetZFlag, Void, U1, ) 33OPCODE(SetZFlag, Void, U1, )
34OPCODE(SetSFlag, Void, U1, ) 34OPCODE(SetSFlag, Void, U1, )
35OPCODE(SetCFlag, Void, U1, ) 35OPCODE(SetCFlag, Void, U1, )
36OPCODE(SetOFlag, Void, U1, ) 36OPCODE(SetOFlag, Void, U1, )
37OPCODE(WorkgroupId, U32x3, ) 37OPCODE(WorkgroupId, U32x3, )
38OPCODE(LocalInvocationId, U32x3, ) 38OPCODE(LocalInvocationId, U32x3, )
39 39
40// Undefined 40// Undefined
41OPCODE(UndefU1, U1, ) 41OPCODE(UndefU1, U1, )
42OPCODE(UndefU8, U8, ) 42OPCODE(UndefU8, U8, )
43OPCODE(UndefU16, U16, ) 43OPCODE(UndefU16, U16, )
44OPCODE(UndefU32, U32, ) 44OPCODE(UndefU32, U32, )
45OPCODE(UndefU64, U64, ) 45OPCODE(UndefU64, U64, )
46 46
47// Memory operations 47// Memory operations
48OPCODE(LoadGlobalU8, U32, U64, ) 48OPCODE(LoadGlobalU8, U32, U64, )
49OPCODE(LoadGlobalS8, U32, U64, ) 49OPCODE(LoadGlobalS8, U32, U64, )
50OPCODE(LoadGlobalU16, U32, U64, ) 50OPCODE(LoadGlobalU16, U32, U64, )
51OPCODE(LoadGlobalS16, U32, U64, ) 51OPCODE(LoadGlobalS16, U32, U64, )
52OPCODE(LoadGlobal32, U32, U64, ) 52OPCODE(LoadGlobal32, U32, U64, )
53OPCODE(LoadGlobal64, U32x2, U64, ) 53OPCODE(LoadGlobal64, U32x2, U64, )
54OPCODE(LoadGlobal128, U32x4, U64, ) 54OPCODE(LoadGlobal128, U32x4, U64, )
55OPCODE(WriteGlobalU8, Void, U64, U32, ) 55OPCODE(WriteGlobalU8, Void, U64, U32, )
56OPCODE(WriteGlobalS8, Void, U64, U32, ) 56OPCODE(WriteGlobalS8, Void, U64, U32, )
57OPCODE(WriteGlobalU16, Void, U64, U32, ) 57OPCODE(WriteGlobalU16, Void, U64, U32, )
58OPCODE(WriteGlobalS16, Void, U64, U32, ) 58OPCODE(WriteGlobalS16, Void, U64, U32, )
59OPCODE(WriteGlobal32, Void, U64, U32, ) 59OPCODE(WriteGlobal32, Void, U64, U32, )
60OPCODE(WriteGlobal64, Void, U64, U32x2, ) 60OPCODE(WriteGlobal64, Void, U64, U32x2, )
61OPCODE(WriteGlobal128, Void, U64, U32x4, ) 61OPCODE(WriteGlobal128, Void, U64, U32x4, )
62 62
63// Storage buffer operations 63// Storage buffer operations
64OPCODE(LoadStorageU8, U32, U32, U32, ) 64OPCODE(LoadStorageU8, U32, U32, U32, )
65OPCODE(LoadStorageS8, U32, U32, U32, ) 65OPCODE(LoadStorageS8, U32, U32, U32, )
66OPCODE(LoadStorageU16, U32, U32, U32, ) 66OPCODE(LoadStorageU16, U32, U32, U32, )
67OPCODE(LoadStorageS16, U32, U32, U32, ) 67OPCODE(LoadStorageS16, U32, U32, U32, )
68OPCODE(LoadStorage32, U32, U32, U32, ) 68OPCODE(LoadStorage32, U32, U32, U32, )
69OPCODE(LoadStorage64, U32x2, U32, U32, ) 69OPCODE(LoadStorage64, U32x2, U32, U32, )
70OPCODE(LoadStorage128, U32x4, U32, U32, ) 70OPCODE(LoadStorage128, U32x4, U32, U32, )
71OPCODE(WriteStorageU8, Void, U32, U32, U32, ) 71OPCODE(WriteStorageU8, Void, U32, U32, U32, )
72OPCODE(WriteStorageS8, Void, U32, U32, U32, ) 72OPCODE(WriteStorageS8, Void, U32, U32, U32, )
73OPCODE(WriteStorageU16, Void, U32, U32, U32, ) 73OPCODE(WriteStorageU16, Void, U32, U32, U32, )
74OPCODE(WriteStorageS16, Void, U32, U32, U32, ) 74OPCODE(WriteStorageS16, Void, U32, U32, U32, )
75OPCODE(WriteStorage32, Void, U32, U32, U32, ) 75OPCODE(WriteStorage32, Void, U32, U32, U32, )
76OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) 76OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
77OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) 77OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
78 78
79// Vector utility 79// Vector utility
80OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) 80OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
81OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) 81OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
82OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) 82OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
83OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) 83OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
84OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) 84OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
85OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) 85OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
86OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) 86OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
87OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) 87OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
88OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) 88OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
89OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) 89OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
90OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) 90OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
91OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) 91OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
92OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) 92OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
93OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) 93OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
94OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) 94OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
95OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) 95OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
96OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) 96OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
97OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) 97OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
98OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) 98OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
99OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) 99OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
100OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) 100OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
101OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) 101OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
102OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) 102OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
103OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) 103OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
104OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) 104OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
105OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) 105OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
106OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) 106OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
107OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) 107OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
108OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) 108OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
109OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) 109OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
110OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) 110OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
111OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) 111OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
112OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) 112OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
113OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) 113OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
114OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) 114OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
115OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) 115OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
116 116
117// Select operations 117// Select operations
118OPCODE(SelectU1, U1, U1, U1, U1, ) 118OPCODE(SelectU1, U1, U1, U1, U1, )
119OPCODE(SelectU8, U8, U1, U8, U8, ) 119OPCODE(SelectU8, U8, U1, U8, U8, )
120OPCODE(SelectU16, U16, U1, U16, U16, ) 120OPCODE(SelectU16, U16, U1, U16, U16, )
121OPCODE(SelectU32, U32, U1, U32, U32, ) 121OPCODE(SelectU32, U32, U1, U32, U32, )
122OPCODE(SelectU64, U64, U1, U64, U64, ) 122OPCODE(SelectU64, U64, U1, U64, U64, )
123OPCODE(SelectF16, F16, U1, F16, F16, ) 123OPCODE(SelectF16, F16, U1, F16, F16, )
124OPCODE(SelectF32, F32, U1, F32, F32, ) 124OPCODE(SelectF32, F32, U1, F32, F32, )
125 125
126// Bitwise conversions 126// Bitwise conversions
127OPCODE(BitCastU16F16, U16, F16, ) 127OPCODE(BitCastU16F16, U16, F16, )
128OPCODE(BitCastU32F32, U32, F32, ) 128OPCODE(BitCastU32F32, U32, F32, )
129OPCODE(BitCastU64F64, U64, F64, ) 129OPCODE(BitCastU64F64, U64, F64, )
130OPCODE(BitCastF16U16, F16, U16, ) 130OPCODE(BitCastF16U16, F16, U16, )
131OPCODE(BitCastF32U32, F32, U32, ) 131OPCODE(BitCastF32U32, F32, U32, )
132OPCODE(BitCastF64U64, F64, U64, ) 132OPCODE(BitCastF64U64, F64, U64, )
133OPCODE(PackUint2x32, U64, U32x2, ) 133OPCODE(PackUint2x32, U64, U32x2, )
134OPCODE(UnpackUint2x32, U32x2, U64, ) 134OPCODE(UnpackUint2x32, U32x2, U64, )
135OPCODE(PackFloat2x16, U32, F16x2, ) 135OPCODE(PackFloat2x16, U32, F16x2, )
136OPCODE(UnpackFloat2x16, F16x2, U32, ) 136OPCODE(UnpackFloat2x16, F16x2, U32, )
137OPCODE(PackHalf2x16, U32, F32x2, ) 137OPCODE(PackHalf2x16, U32, F32x2, )
138OPCODE(UnpackHalf2x16, F32x2, U32, ) 138OPCODE(UnpackHalf2x16, F32x2, U32, )
139OPCODE(PackDouble2x32, F64, U32x2, ) 139OPCODE(PackDouble2x32, F64, U32x2, )
140OPCODE(UnpackDouble2x32, U32x2, F64, ) 140OPCODE(UnpackDouble2x32, U32x2, F64, )
141 141
142// Pseudo-operation, handled specially at final emit 142// Pseudo-operation, handled specially at final emit
143OPCODE(GetZeroFromOp, U1, Opaque, ) 143OPCODE(GetZeroFromOp, U1, Opaque, )
144OPCODE(GetSignFromOp, U1, Opaque, ) 144OPCODE(GetSignFromOp, U1, Opaque, )
145OPCODE(GetCarryFromOp, U1, Opaque, ) 145OPCODE(GetCarryFromOp, U1, Opaque, )
146OPCODE(GetOverflowFromOp, U1, Opaque, ) 146OPCODE(GetOverflowFromOp, U1, Opaque, )
147OPCODE(GetSparseFromOp, U1, Opaque, )
147 148
148// Floating-point operations 149// Floating-point operations
149OPCODE(FPAbs16, F16, F16, ) 150OPCODE(FPAbs16, F16, F16, )
150OPCODE(FPAbs32, F32, F32, ) 151OPCODE(FPAbs32, F32, F32, )
151OPCODE(FPAbs64, F64, F64, ) 152OPCODE(FPAbs64, F64, F64, )
152OPCODE(FPAdd16, F16, F16, F16, ) 153OPCODE(FPAdd16, F16, F16, F16, )
153OPCODE(FPAdd32, F32, F32, F32, ) 154OPCODE(FPAdd32, F32, F32, F32, )
154OPCODE(FPAdd64, F64, F64, F64, ) 155OPCODE(FPAdd64, F64, F64, F64, )
155OPCODE(FPFma16, F16, F16, F16, F16, ) 156OPCODE(FPFma16, F16, F16, F16, F16, )
156OPCODE(FPFma32, F32, F32, F32, F32, ) 157OPCODE(FPFma32, F32, F32, F32, F32, )
157OPCODE(FPFma64, F64, F64, F64, F64, ) 158OPCODE(FPFma64, F64, F64, F64, F64, )
158OPCODE(FPMax32, F32, F32, F32, ) 159OPCODE(FPMax32, F32, F32, F32, )
159OPCODE(FPMax64, F64, F64, F64, ) 160OPCODE(FPMax64, F64, F64, F64, )
160OPCODE(FPMin32, F32, F32, F32, ) 161OPCODE(FPMin32, F32, F32, F32, )
161OPCODE(FPMin64, F64, F64, F64, ) 162OPCODE(FPMin64, F64, F64, F64, )
162OPCODE(FPMul16, F16, F16, F16, ) 163OPCODE(FPMul16, F16, F16, F16, )
163OPCODE(FPMul32, F32, F32, F32, ) 164OPCODE(FPMul32, F32, F32, F32, )
164OPCODE(FPMul64, F64, F64, F64, ) 165OPCODE(FPMul64, F64, F64, F64, )
165OPCODE(FPNeg16, F16, F16, ) 166OPCODE(FPNeg16, F16, F16, )
166OPCODE(FPNeg32, F32, F32, ) 167OPCODE(FPNeg32, F32, F32, )
167OPCODE(FPNeg64, F64, F64, ) 168OPCODE(FPNeg64, F64, F64, )
168OPCODE(FPRecip32, F32, F32, ) 169OPCODE(FPRecip32, F32, F32, )
169OPCODE(FPRecip64, F64, F64, ) 170OPCODE(FPRecip64, F64, F64, )
170OPCODE(FPRecipSqrt32, F32, F32, ) 171OPCODE(FPRecipSqrt32, F32, F32, )
171OPCODE(FPRecipSqrt64, F64, F64, ) 172OPCODE(FPRecipSqrt64, F64, F64, )
172OPCODE(FPSqrt, F32, F32, ) 173OPCODE(FPSqrt, F32, F32, )
173OPCODE(FPSin, F32, F32, ) 174OPCODE(FPSin, F32, F32, )
174OPCODE(FPExp2, F32, F32, ) 175OPCODE(FPExp2, F32, F32, )
175OPCODE(FPCos, F32, F32, ) 176OPCODE(FPCos, F32, F32, )
176OPCODE(FPLog2, F32, F32, ) 177OPCODE(FPLog2, F32, F32, )
177OPCODE(FPSaturate16, F16, F16, ) 178OPCODE(FPSaturate16, F16, F16, )
178OPCODE(FPSaturate32, F32, F32, ) 179OPCODE(FPSaturate32, F32, F32, )
179OPCODE(FPSaturate64, F64, F64, ) 180OPCODE(FPSaturate64, F64, F64, )
180OPCODE(FPRoundEven16, F16, F16, ) 181OPCODE(FPRoundEven16, F16, F16, )
181OPCODE(FPRoundEven32, F32, F32, ) 182OPCODE(FPRoundEven32, F32, F32, )
182OPCODE(FPRoundEven64, F64, F64, ) 183OPCODE(FPRoundEven64, F64, F64, )
183OPCODE(FPFloor16, F16, F16, ) 184OPCODE(FPFloor16, F16, F16, )
184OPCODE(FPFloor32, F32, F32, ) 185OPCODE(FPFloor32, F32, F32, )
185OPCODE(FPFloor64, F64, F64, ) 186OPCODE(FPFloor64, F64, F64, )
186OPCODE(FPCeil16, F16, F16, ) 187OPCODE(FPCeil16, F16, F16, )
187OPCODE(FPCeil32, F32, F32, ) 188OPCODE(FPCeil32, F32, F32, )
188OPCODE(FPCeil64, F64, F64, ) 189OPCODE(FPCeil64, F64, F64, )
189OPCODE(FPTrunc16, F16, F16, ) 190OPCODE(FPTrunc16, F16, F16, )
190OPCODE(FPTrunc32, F32, F32, ) 191OPCODE(FPTrunc32, F32, F32, )
191OPCODE(FPTrunc64, F64, F64, ) 192OPCODE(FPTrunc64, F64, F64, )
192 193
193OPCODE(FPOrdEqual16, U1, F16, F16, ) 194OPCODE(FPOrdEqual16, U1, F16, F16, )
194OPCODE(FPOrdEqual32, U1, F32, F32, ) 195OPCODE(FPOrdEqual32, U1, F32, F32, )
195OPCODE(FPOrdEqual64, U1, F64, F64, ) 196OPCODE(FPOrdEqual64, U1, F64, F64, )
196OPCODE(FPUnordEqual16, U1, F16, F16, ) 197OPCODE(FPUnordEqual16, U1, F16, F16, )
197OPCODE(FPUnordEqual32, U1, F32, F32, ) 198OPCODE(FPUnordEqual32, U1, F32, F32, )
198OPCODE(FPUnordEqual64, U1, F64, F64, ) 199OPCODE(FPUnordEqual64, U1, F64, F64, )
199OPCODE(FPOrdNotEqual16, U1, F16, F16, ) 200OPCODE(FPOrdNotEqual16, U1, F16, F16, )
200OPCODE(FPOrdNotEqual32, U1, F32, F32, ) 201OPCODE(FPOrdNotEqual32, U1, F32, F32, )
201OPCODE(FPOrdNotEqual64, U1, F64, F64, ) 202OPCODE(FPOrdNotEqual64, U1, F64, F64, )
202OPCODE(FPUnordNotEqual16, U1, F16, F16, ) 203OPCODE(FPUnordNotEqual16, U1, F16, F16, )
203OPCODE(FPUnordNotEqual32, U1, F32, F32, ) 204OPCODE(FPUnordNotEqual32, U1, F32, F32, )
204OPCODE(FPUnordNotEqual64, U1, F64, F64, ) 205OPCODE(FPUnordNotEqual64, U1, F64, F64, )
205OPCODE(FPOrdLessThan16, U1, F16, F16, ) 206OPCODE(FPOrdLessThan16, U1, F16, F16, )
206OPCODE(FPOrdLessThan32, U1, F32, F32, ) 207OPCODE(FPOrdLessThan32, U1, F32, F32, )
207OPCODE(FPOrdLessThan64, U1, F64, F64, ) 208OPCODE(FPOrdLessThan64, U1, F64, F64, )
208OPCODE(FPUnordLessThan16, U1, F16, F16, ) 209OPCODE(FPUnordLessThan16, U1, F16, F16, )
209OPCODE(FPUnordLessThan32, U1, F32, F32, ) 210OPCODE(FPUnordLessThan32, U1, F32, F32, )
210OPCODE(FPUnordLessThan64, U1, F64, F64, ) 211OPCODE(FPUnordLessThan64, U1, F64, F64, )
211OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) 212OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
212OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) 213OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
213OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) 214OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
214OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) 215OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
215OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) 216OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
216OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) 217OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
217OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) 218OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
218OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) 219OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
219OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) 220OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
220OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) 221OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
221OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) 222OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
222OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) 223OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
223OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) 224OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
224OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) 225OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
225OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) 226OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
226OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) 227OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
227OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) 228OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
228OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) 229OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
229 230
230// Integer operations 231// Integer operations
231OPCODE(IAdd32, U32, U32, U32, ) 232OPCODE(IAdd32, U32, U32, U32, )
232OPCODE(IAdd64, U64, U64, U64, ) 233OPCODE(IAdd64, U64, U64, U64, )
233OPCODE(ISub32, U32, U32, U32, ) 234OPCODE(ISub32, U32, U32, U32, )
234OPCODE(ISub64, U64, U64, U64, ) 235OPCODE(ISub64, U64, U64, U64, )
235OPCODE(IMul32, U32, U32, U32, ) 236OPCODE(IMul32, U32, U32, U32, )
236OPCODE(INeg32, U32, U32, ) 237OPCODE(INeg32, U32, U32, )
237OPCODE(INeg64, U64, U64, ) 238OPCODE(INeg64, U64, U64, )
238OPCODE(IAbs32, U32, U32, ) 239OPCODE(IAbs32, U32, U32, )
239OPCODE(ShiftLeftLogical32, U32, U32, U32, ) 240OPCODE(ShiftLeftLogical32, U32, U32, U32, )
240OPCODE(ShiftLeftLogical64, U64, U64, U32, ) 241OPCODE(ShiftLeftLogical64, U64, U64, U32, )
241OPCODE(ShiftRightLogical32, U32, U32, U32, ) 242OPCODE(ShiftRightLogical32, U32, U32, U32, )
242OPCODE(ShiftRightLogical64, U64, U64, U32, ) 243OPCODE(ShiftRightLogical64, U64, U64, U32, )
243OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) 244OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
244OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) 245OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
245OPCODE(BitwiseAnd32, U32, U32, U32, ) 246OPCODE(BitwiseAnd32, U32, U32, U32, )
246OPCODE(BitwiseOr32, U32, U32, U32, ) 247OPCODE(BitwiseOr32, U32, U32, U32, )
247OPCODE(BitwiseXor32, U32, U32, U32, ) 248OPCODE(BitwiseXor32, U32, U32, U32, )
248OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) 249OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
249OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) 250OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
250OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) 251OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
251OPCODE(BitReverse32, U32, U32, ) 252OPCODE(BitReverse32, U32, U32, )
252OPCODE(BitCount32, U32, U32, ) 253OPCODE(BitCount32, U32, U32, )
253OPCODE(BitwiseNot32, U32, U32, ) 254OPCODE(BitwiseNot32, U32, U32, )
254 255
255OPCODE(FindSMsb32, U32, U32, ) 256OPCODE(FindSMsb32, U32, U32, )
256OPCODE(FindUMsb32, U32, U32, ) 257OPCODE(FindUMsb32, U32, U32, )
257OPCODE(SMin32, U32, U32, U32, ) 258OPCODE(SMin32, U32, U32, U32, )
258OPCODE(UMin32, U32, U32, U32, ) 259OPCODE(UMin32, U32, U32, U32, )
259OPCODE(SMax32, U32, U32, U32, ) 260OPCODE(SMax32, U32, U32, U32, )
260OPCODE(UMax32, U32, U32, U32, ) 261OPCODE(UMax32, U32, U32, U32, )
261OPCODE(SLessThan, U1, U32, U32, ) 262OPCODE(SLessThan, U1, U32, U32, )
262OPCODE(ULessThan, U1, U32, U32, ) 263OPCODE(ULessThan, U1, U32, U32, )
263OPCODE(IEqual, U1, U32, U32, ) 264OPCODE(IEqual, U1, U32, U32, )
264OPCODE(SLessThanEqual, U1, U32, U32, ) 265OPCODE(SLessThanEqual, U1, U32, U32, )
265OPCODE(ULessThanEqual, U1, U32, U32, ) 266OPCODE(ULessThanEqual, U1, U32, U32, )
266OPCODE(SGreaterThan, U1, U32, U32, ) 267OPCODE(SGreaterThan, U1, U32, U32, )
267OPCODE(UGreaterThan, U1, U32, U32, ) 268OPCODE(UGreaterThan, U1, U32, U32, )
268OPCODE(INotEqual, U1, U32, U32, ) 269OPCODE(INotEqual, U1, U32, U32, )
269OPCODE(SGreaterThanEqual, U1, U32, U32, ) 270OPCODE(SGreaterThanEqual, U1, U32, U32, )
270OPCODE(UGreaterThanEqual, U1, U32, U32, ) 271OPCODE(UGreaterThanEqual, U1, U32, U32, )
271 272
272// Logical operations 273// Logical operations
273OPCODE(LogicalOr, U1, U1, U1, ) 274OPCODE(LogicalOr, U1, U1, U1, )
274OPCODE(LogicalAnd, U1, U1, U1, ) 275OPCODE(LogicalAnd, U1, U1, U1, )
275OPCODE(LogicalXor, U1, U1, U1, ) 276OPCODE(LogicalXor, U1, U1, U1, )
276OPCODE(LogicalNot, U1, U1, ) 277OPCODE(LogicalNot, U1, U1, )
277 278
278// Conversion operations 279// Conversion operations
279OPCODE(ConvertS16F16, U32, F16, ) 280OPCODE(ConvertS16F16, U32, F16, )
280OPCODE(ConvertS16F32, U32, F32, ) 281OPCODE(ConvertS16F32, U32, F32, )
281OPCODE(ConvertS16F64, U32, F64, ) 282OPCODE(ConvertS16F64, U32, F64, )
282OPCODE(ConvertS32F16, U32, F16, ) 283OPCODE(ConvertS32F16, U32, F16, )
283OPCODE(ConvertS32F32, U32, F32, ) 284OPCODE(ConvertS32F32, U32, F32, )
284OPCODE(ConvertS32F64, U32, F64, ) 285OPCODE(ConvertS32F64, U32, F64, )
285OPCODE(ConvertS64F16, U64, F16, ) 286OPCODE(ConvertS64F16, U64, F16, )
286OPCODE(ConvertS64F32, U64, F32, ) 287OPCODE(ConvertS64F32, U64, F32, )
287OPCODE(ConvertS64F64, U64, F64, ) 288OPCODE(ConvertS64F64, U64, F64, )
288OPCODE(ConvertU16F16, U32, F16, ) 289OPCODE(ConvertU16F16, U32, F16, )
289OPCODE(ConvertU16F32, U32, F32, ) 290OPCODE(ConvertU16F32, U32, F32, )
290OPCODE(ConvertU16F64, U32, F64, ) 291OPCODE(ConvertU16F64, U32, F64, )
291OPCODE(ConvertU32F16, U32, F16, ) 292OPCODE(ConvertU32F16, U32, F16, )
292OPCODE(ConvertU32F32, U32, F32, ) 293OPCODE(ConvertU32F32, U32, F32, )
293OPCODE(ConvertU32F64, U32, F64, ) 294OPCODE(ConvertU32F64, U32, F64, )
294OPCODE(ConvertU64F16, U64, F16, ) 295OPCODE(ConvertU64F16, U64, F16, )
295OPCODE(ConvertU64F32, U64, F32, ) 296OPCODE(ConvertU64F32, U64, F32, )
296OPCODE(ConvertU64F64, U64, F64, ) 297OPCODE(ConvertU64F64, U64, F64, )
297OPCODE(ConvertU64U32, U64, U32, ) 298OPCODE(ConvertU64U32, U64, U32, )
298OPCODE(ConvertU32U64, U32, U64, ) 299OPCODE(ConvertU32U64, U32, U64, )
299OPCODE(ConvertF16F32, F16, F32, ) 300OPCODE(ConvertF16F32, F16, F32, )
300OPCODE(ConvertF32F16, F32, F16, ) 301OPCODE(ConvertF32F16, F32, F16, )
301OPCODE(ConvertF32F64, F32, F64, ) 302OPCODE(ConvertF32F64, F32, F64, )
302OPCODE(ConvertF64F32, F64, F32, ) 303OPCODE(ConvertF64F32, F64, F32, )
304OPCODE(ConvertF16S32, F16, U32, )
305OPCODE(ConvertF16S64, F16, U64, )
306OPCODE(ConvertF16U32, F16, U32, )
307OPCODE(ConvertF16U64, F16, U64, )
308OPCODE(ConvertF32S32, F32, U32, )
309OPCODE(ConvertF32S64, F32, U64, )
310OPCODE(ConvertF32U32, F32, U32, )
311OPCODE(ConvertF32U64, F32, U64, )
312OPCODE(ConvertF64S32, F64, U32, )
313OPCODE(ConvertF64S64, F64, U64, )
314OPCODE(ConvertF64U32, F64, U32, )
315OPCODE(ConvertF64U64, F64, U64, )
316
317// Image operations
318OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
319OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
320OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
321OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
322
323OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
324OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
325OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
326OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
327
328OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
329OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
330OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
331OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
index 771094eb9..8fea05f7b 100644
--- a/src/shader_recompiler/frontend/ir/reg.h
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256;
293 return reg + (-num); 293 return reg + (-num);
294} 294}
295 295
296[[nodiscard]] constexpr Reg operator++(Reg& reg) {
297 reg = reg + 1;
298 return reg;
299}
300
301[[nodiscard]] constexpr Reg operator++(Reg& reg, int) {
302 const Reg copy{reg};
303 reg = reg + 1;
304 return copy;
305}
306
296[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { 307[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
297 return static_cast<size_t>(reg); 308 return static_cast<size_t>(reg);
298} 309}
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 9b7e1480b..3602883d6 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -75,6 +75,7 @@ private:
75 f64 imm_f64; 75 f64 imm_f64;
76 }; 76 };
77}; 77};
78static_assert(std::is_trivially_copyable_v<Value>);
78 79
79template <IR::Type type_> 80template <IR::Type type_>
80class TypedValue : public Value { 81class TypedValue : public Value {
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
index 5d0b91598..f2a2ff331 100644
--- a/src/shader_recompiler/frontend/maxwell/maxwell.inc
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----")
249INST(SURED, "SURED", "1110 1011 010- ----") 249INST(SURED, "SURED", "1110 1011 010- ----")
250INST(SUST, "SUST", "1110 1011 001- ----") 250INST(SUST, "SUST", "1110 1011 001- ----")
251INST(SYNC, "SYNC", "1111 0000 1111 1---") 251INST(SYNC, "SYNC", "1111 0000 1111 1---")
252INST(TEX, "TEX", "1100 00-- --11 1---") 252INST(TEX, "TEX", "1100 0--- ---- ----")
253INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") 253INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
254INST(TEXS, "TEXS", "1101 -00- ---- ----") 254INST(TEXS, "TEXS", "1101 -00- ---- ----")
255INST(TLD, "TLD", "1101 1100 --11 1---") 255INST(TLD, "TLD", "1101 1100 --11 1---")
256INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") 256INST(TLD_b, "TLD (b)", "1101 1101 --11 1---")
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index dbfc04f75..b270bbccd 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
62 Optimization::SsaRewritePass(function.post_order_blocks); 62 Optimization::SsaRewritePass(function.post_order_blocks);
63 } 63 }
64 Optimization::GlobalMemoryToStorageBufferPass(program); 64 Optimization::GlobalMemoryToStorageBufferPass(program);
65 Optimization::TexturePass(env, program);
65 for (IR::Function& function : functions) { 66 for (IR::Function& function : functions) {
66 Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); 67 Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function);
67 Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); 68 Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index fc6030e04..ff429c126 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) {
585 ThrowNotImplemented(Opcode::SYNC); 585 ThrowNotImplemented(Opcode::SYNC);
586} 586}
587 587
588void TranslatorVisitor::TEX(u64) {
589 ThrowNotImplemented(Opcode::TEX);
590}
591
592void TranslatorVisitor::TEX_b(u64) {
593 ThrowNotImplemented(Opcode::TEX_b);
594}
595
596void TranslatorVisitor::TEXS(u64) { 588void TranslatorVisitor::TEXS(u64) {
597 ThrowNotImplemented(Opcode::TEXS); 589 ThrowNotImplemented(Opcode::TEXS);
598} 590}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
new file mode 100644
index 000000000..98d9f4c64
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
@@ -0,0 +1,232 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Blod : u64 {
15 None,
16 LZ,
17 LB,
18 LL,
19 INVALIDBLOD4,
20 INVALIDBLOD5,
21 LBA,
22 LLA,
23};
24
25enum class TextureType : u64 {
26 _1D,
27 ARRAY_1D,
28 _2D,
29 ARRAY_2D,
30 _3D,
31 ARRAY_3D,
32 CUBE,
33 ARRAY_CUBE,
34};
35
36Shader::TextureType GetType(TextureType type, bool dc) {
37 switch (type) {
38 case TextureType::_1D:
39 return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D;
40 case TextureType::ARRAY_1D:
41 return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D;
42 case TextureType::_2D:
43 return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D;
44 case TextureType::ARRAY_2D:
45 return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D;
46 case TextureType::_3D:
47 return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D;
48 case TextureType::ARRAY_3D:
49 throw NotImplementedException("3D array texture type");
50 case TextureType::CUBE:
51 return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube;
52 case TextureType::ARRAY_CUBE:
53 return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube;
54 }
55 throw NotImplementedException("Invalid texture type {}", type);
56}
57
58IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
59 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }};
60 switch (type) {
61 case TextureType::_1D:
62 return v.F(reg);
63 case TextureType::ARRAY_1D:
64 return v.ir.CompositeConstruct(read_array(), v.F(reg + 1));
65 case TextureType::_2D:
66 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
67 case TextureType::ARRAY_2D:
68 return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2));
69 case TextureType::_3D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
71 case TextureType::ARRAY_3D:
72 throw NotImplementedException("3D array texture type");
73 case TextureType::CUBE:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_CUBE:
76 return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
77 }
78 throw NotImplementedException("Invalid texture type {}", type);
79}
80
81IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
82 switch (blod) {
83 case Blod::None:
84 return v.ir.Imm32(0.0f);
85 case Blod::LZ:
86 return v.ir.Imm32(0.0f);
87 case Blod::LB:
88 case Blod::LL:
89 case Blod::LBA:
90 case Blod::LLA:
91 return v.F(reg++);
92 case Blod::INVALIDBLOD4:
93 case Blod::INVALIDBLOD5:
94 break;
95 }
96 throw NotImplementedException("Invalid blod {}", blod);
97}
98
99IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
100 const IR::U32 value{v.X(reg++)};
101 switch (type) {
102 case TextureType::_1D:
103 case TextureType::ARRAY_1D:
104 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4));
105 case TextureType::_2D:
106 case TextureType::ARRAY_2D:
107 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
108 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)));
109 case TextureType::_3D:
110 case TextureType::ARRAY_3D:
111 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
112 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)),
113 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4)));
114 case TextureType::CUBE:
115 case TextureType::ARRAY_CUBE:
116 throw NotImplementedException("Illegal offset on CUBE sample");
117 }
118 throw NotImplementedException("Invalid texture type {}", type);
119}
120
121bool HasExplicitLod(Blod blod) {
122 switch (blod) {
123 case Blod::LL:
124 case Blod::LLA:
125 case Blod::LZ:
126 return true;
127 default:
128 return false;
129 }
130}
131
132void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
133 std::optional<u32> cbuf_offset) {
134 union {
135 u64 raw;
136 BitField<35, 1, u64> ndv;
137 BitField<49, 1, u64> nodep;
138 BitField<50, 1, u64> dc;
139 BitField<51, 3, IR::Pred> sparse_pred;
140 BitField<0, 8, IR::Reg> dest_reg;
141 BitField<8, 8, IR::Reg> coord_reg;
142 BitField<20, 8, IR::Reg> meta_reg;
143 BitField<28, 3, TextureType> type;
144 BitField<31, 4, u64> mask;
145 } const tex{insn};
146
147 if (lc) {
148 throw NotImplementedException("LC");
149 }
150 const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
151
152 IR::Reg meta_reg{tex.meta_reg};
153 IR::Value handle;
154 IR::Value offset;
155 IR::F32 dref;
156 IR::F32 lod_clamp;
157 if (cbuf_offset) {
158 handle = v.ir.Imm32(*cbuf_offset);
159 } else {
160 handle = v.X(meta_reg++);
161 }
162 const IR::F32 lod{MakeLod(v, meta_reg, blod)};
163 if (aoffi) {
164 offset = MakeOffset(v, meta_reg, tex.type);
165 }
166 if (tex.dc != 0) {
167 dref = v.F(meta_reg++);
168 }
169 IR::TextureInstInfo info{};
170 info.type.Assign(GetType(tex.type, tex.dc != 0));
171 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
172 info.has_lod_clamp.Assign(lc ? 1 : 0);
173
174 const IR::Value sample{[&]() -> IR::Value {
175 if (tex.dc == 0) {
176 if (HasExplicitLod(blod)) {
177 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info);
178 } else {
179 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
180 }
181 }
182 if (HasExplicitLod(blod)) {
183 return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp,
184 info);
185 } else {
186 return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
187 info);
188 }
189 }()};
190
191 for (int element = 0; element < 4; ++element) {
192 if (((tex.mask >> element) & 1) == 0) {
193 continue;
194 }
195 IR::F32 value;
196 if (tex.dc != 0) {
197 value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
198 } else {
199 value = IR::F32{v.ir.CompositeExtract(sample, element)};
200 }
201 v.F(tex.dest_reg + element, value);
202 }
203 if (tex.sparse_pred != IR::Pred::PT) {
204 v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
205 }
206}
207} // Anonymous namespace
208
209void TranslatorVisitor::TEX(u64 insn) {
210 union {
211 u64 raw;
212 BitField<54, 1, u64> aoffi;
213 BitField<55, 3, Blod> blod;
214 BitField<58, 1, u64> lc;
215 BitField<36, 13, u64> cbuf_offset;
216 } const tex{insn};
217
218 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset));
219}
220
221void TranslatorVisitor::TEX_b(u64 insn) {
222 union {
223 u64 raw;
224 BitField<36, 1, u64> aoffi;
225 BitField<37, 3, Blod> blod;
226 BitField<40, 1, u64> lc;
227 } const tex{insn};
228
229 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
230}
231
232} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 6662ef4cd..960beadd4 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) {
82 throw NotImplementedException("Constant buffer with non-immediate index"); 82 throw NotImplementedException("Constant buffer with non-immediate index");
83 } 83 }
84 break; 84 break;
85 case IR::Opcode::BindlessImageSampleImplicitLod:
86 case IR::Opcode::BindlessImageSampleExplicitLod:
87 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
88 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
89 case IR::Opcode::BoundImageSampleImplicitLod:
90 case IR::Opcode::BoundImageSampleExplicitLod:
91 case IR::Opcode::BoundImageSampleDrefImplicitLod:
92 case IR::Opcode::BoundImageSampleDrefExplicitLod:
93 case IR::Opcode::ImageSampleImplicitLod:
94 case IR::Opcode::ImageSampleExplicitLod:
95 case IR::Opcode::ImageSampleDrefImplicitLod:
96 case IR::Opcode::ImageSampleDrefExplicitLod: {
97 const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
98 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D ||
99 type == TextureType::Shadow1D || type == TextureType::ShadowArray1D;
100 info.uses_sparse_residency |=
101 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
102 break;
103 }
85 default: 104 default:
86 break; 105 break;
87 } 106 }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 965e52135..2625c0bb2 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -226,6 +226,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
226 } 226 }
227 // Reversed loops are more likely to find the right result 227 // Reversed loops are more likely to find the right result
228 for (size_t arg = inst->NumArgs(); arg--;) { 228 for (size_t arg = inst->NumArgs(); arg--;) {
229 IR::Block* inst_block{block};
229 if (inst->Opcode() == IR::Opcode::Phi) { 230 if (inst->Opcode() == IR::Opcode::Phi) {
230 // If we are going through a phi node, mark the current block as visited 231 // If we are going through a phi node, mark the current block as visited
231 visited.insert(block); 232 visited.insert(block);
@@ -235,15 +236,11 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
235 // Already visited, skip 236 // Already visited, skip
236 continue; 237 continue;
237 } 238 }
238 const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; 239 inst_block = phi_block;
239 if (storage_buffer) { 240 }
240 return *storage_buffer; 241 const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)};
241 } 242 if (storage_buffer) {
242 } else { 243 return *storage_buffer;
243 const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)};
244 if (storage_buffer) {
245 return *storage_buffer;
246 }
247 } 244 }
248 } 245 }
249 return std::nullopt; 246 return std::nullopt;
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 38106308c..3b7e7306b 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -6,6 +6,7 @@
6 6
7#include <span> 7#include <span>
8 8
9#include "shader_recompiler/environment.h"
9#include "shader_recompiler/frontend/ir/basic_block.h" 10#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/function.h" 11#include "shader_recompiler/frontend/ir/function.h"
11#include "shader_recompiler/frontend/ir/program.h" 12#include "shader_recompiler/frontend/ir/program.h"
@@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program);
26void IdentityRemovalPass(IR::Function& function); 27void IdentityRemovalPass(IR::Function& function);
27void LowerFp16ToFp32(IR::Program& program); 28void LowerFp16ToFp32(IR::Program& program);
28void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); 29void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
30void TexturePass(Environment& env, IR::Program& program);
29void VerificationPass(const IR::Function& function); 31void VerificationPass(const IR::Function& function);
30 32
31} // namespace Shader::Optimization 33} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..80e4ad6a9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,199 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include <boost/container/flat_set.hpp>
8#include <boost/container/small_vector.hpp>
9
10#include "shader_recompiler/environment.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/ir_opt/passes.h"
14#include "shader_recompiler/shader_info.h"
15
16namespace Shader::Optimization {
17namespace {
18struct ConstBufferAddr {
19 u32 index;
20 u32 offset;
21};
22
23struct TextureInst {
24 ConstBufferAddr cbuf;
25 IR::Inst* inst;
26 IR::Block* block;
27};
28
29using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
30
31using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>,
32 boost::container::small_vector<IR::Block*, 2>>;
33
34IR::Opcode IndexedInstruction(const IR::Inst& inst) {
35 switch (inst.Opcode()) {
36 case IR::Opcode::BindlessImageSampleImplicitLod:
37 case IR::Opcode::BoundImageSampleImplicitLod:
38 return IR::Opcode::ImageSampleImplicitLod;
39 case IR::Opcode::BoundImageSampleExplicitLod:
40 case IR::Opcode::BindlessImageSampleExplicitLod:
41 return IR::Opcode::ImageSampleExplicitLod;
42 case IR::Opcode::BoundImageSampleDrefImplicitLod:
43 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
44 return IR::Opcode::ImageSampleDrefImplicitLod;
45 case IR::Opcode::BoundImageSampleDrefExplicitLod:
46 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
47 return IR::Opcode::ImageSampleDrefExplicitLod;
48 default:
49 return IR::Opcode::Void;
50 }
51}
52
53bool IsBindless(const IR::Inst& inst) {
54 switch (inst.Opcode()) {
55 case IR::Opcode::BindlessImageSampleImplicitLod:
56 case IR::Opcode::BindlessImageSampleExplicitLod:
57 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
58 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
59 return true;
60 case IR::Opcode::BoundImageSampleImplicitLod:
61 case IR::Opcode::BoundImageSampleExplicitLod:
62 case IR::Opcode::BoundImageSampleDrefImplicitLod:
63 case IR::Opcode::BoundImageSampleDrefExplicitLod:
64 return false;
65 default:
66 throw InvalidArgument("Invalid opcode {}", inst.Opcode());
67 }
68}
69
70bool IsTextureInstruction(const IR::Inst& inst) {
71 return IndexedInstruction(inst) != IR::Opcode::Void;
72}
73
74std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value,
75 VisitedBlocks& visited) {
76 if (value.IsImmediate()) {
77 // Immediates can't be a storage buffer
78 return std::nullopt;
79 }
80 const IR::Inst* const inst{value.InstRecursive()};
81 if (inst->Opcode() == IR::Opcode::GetCbuf) {
82 const IR::Value index{inst->Arg(0)};
83 const IR::Value offset{inst->Arg(1)};
84 if (!index.IsImmediate()) {
85 // Reading a bindless texture from variable indices is valid
86 // but not supported here at the moment
87 return std::nullopt;
88 }
89 if (!offset.IsImmediate()) {
90 // TODO: Support arrays of textures
91 return std::nullopt;
92 }
93 return ConstBufferAddr{
94 .index{index.U32()},
95 .offset{offset.U32()},
96 };
97 }
98 // Reversed loops are more likely to find the right result
99 for (size_t arg = inst->NumArgs(); arg--;) {
100 IR::Block* inst_block{block};
101 if (inst->Opcode() == IR::Opcode::Phi) {
102 // If we are going through a phi node, mark the current block as visited
103 visited.insert(block);
104 // and skip already visited blocks to avoid looping forever
105 IR::Block* const phi_block{inst->PhiBlock(arg)};
106 if (visited.contains(phi_block)) {
107 // Already visited, skip
108 continue;
109 }
110 inst_block = phi_block;
111 }
112 const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)};
113 if (storage_buffer) {
114 return *storage_buffer;
115 }
116 }
117 return std::nullopt;
118}
119
120TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
121 ConstBufferAddr addr;
122 if (IsBindless(inst)) {
123 VisitedBlocks visited;
124 const std::optional<ConstBufferAddr> track_addr{Track(block, IR::Value{&inst}, visited)};
125 if (!track_addr) {
126 throw NotImplementedException("Failed to track bindless texture constant buffer");
127 }
128 addr = *track_addr;
129 } else {
130 addr = ConstBufferAddr{
131 .index{env.TextureBoundBuffer()},
132 .offset{inst.Arg(0).U32()},
133 };
134 }
135 return TextureInst{
136 .cbuf{addr},
137 .inst{&inst},
138 .block{block},
139 };
140}
141
142class Descriptors {
143public:
144 explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {}
145
146 u32 Add(const TextureDescriptor& descriptor) {
147 // TODO: Handle arrays
148 auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) {
149 return descriptor.cbuf_index == existing.cbuf_index &&
150 descriptor.cbuf_offset == existing.cbuf_offset &&
151 descriptor.type == existing.type;
152 })};
153 if (it != descriptors.end()) {
154 return static_cast<u32>(std::distance(descriptors.begin(), it));
155 }
156 descriptors.push_back(descriptor);
157 return static_cast<u32>(descriptors.size()) - 1;
158 }
159
160private:
161 TextureDescriptors& descriptors;
162};
163} // Anonymous namespace
164
165void TexturePass(Environment& env, IR::Program& program) {
166 TextureInstVector to_replace;
167 for (IR::Function& function : program.functions) {
168 for (IR::Block* const block : function.post_order_blocks) {
169 for (IR::Inst& inst : block->Instructions()) {
170 if (!IsTextureInstruction(inst)) {
171 continue;
172 }
173 to_replace.push_back(MakeInst(env, block, inst));
174 }
175 }
176 }
177 // Sort instructions to visit textures by constant buffer index, then by offset
178 std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
179 return lhs.cbuf.offset < rhs.cbuf.offset;
180 });
181 std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
182 return lhs.cbuf.index < rhs.cbuf.index;
183 });
184 Descriptors descriptors{program.info.texture_descriptors};
185 for (TextureInst& texture_inst : to_replace) {
186 // TODO: Handle arrays
187 IR::Inst* const inst{texture_inst.inst};
188 const u32 index{descriptors.Add(TextureDescriptor{
189 .type{inst->Flags<IR::TextureInstInfo>().type},
190 .cbuf_index{texture_inst.cbuf.index},
191 .cbuf_offset{texture_inst.cbuf.offset},
192 .count{1},
193 })};
194 inst->ReplaceOpcode(IndexedInstruction(*inst));
195 inst->SetArg(0, IR::Value{index});
196 }
197}
198
199} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 8766bf13e..103a2f0b4 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -8,25 +8,51 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10 10
11#include <boost/container/small_vector.hpp>
11#include <boost/container/static_vector.hpp> 12#include <boost/container/static_vector.hpp>
12 13
13namespace Shader { 14namespace Shader {
14 15
16enum class TextureType : u32 {
17 Color1D,
18 ColorArray1D,
19 Color2D,
20 ColorArray2D,
21 Color3D,
22 ColorCube,
23 ColorArrayCube,
24 Shadow1D,
25 ShadowArray1D,
26 Shadow2D,
27 ShadowArray2D,
28 Shadow3D,
29 ShadowCube,
30 ShadowArrayCube,
31};
32
33struct TextureDescriptor {
34 TextureType type;
35 u32 cbuf_index;
36 u32 cbuf_offset;
37 u32 count;
38};
39using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
40
41struct ConstantBufferDescriptor {
42 u32 index;
43 u32 count;
44};
45
46struct StorageBufferDescriptor {
47 u32 cbuf_index;
48 u32 cbuf_offset;
49 u32 count;
50};
51
15struct Info { 52struct Info {
16 static constexpr size_t MAX_CBUFS{18}; 53 static constexpr size_t MAX_CBUFS{18};
17 static constexpr size_t MAX_SSBOS{16}; 54 static constexpr size_t MAX_SSBOS{16};
18 55
19 struct ConstantBufferDescriptor {
20 u32 index;
21 u32 count;
22 };
23
24 struct StorageBufferDescriptor {
25 u32 cbuf_index;
26 u32 cbuf_offset;
27 u32 count;
28 };
29
30 bool uses_workgroup_id{}; 56 bool uses_workgroup_id{};
31 bool uses_local_invocation_id{}; 57 bool uses_local_invocation_id{};
32 bool uses_fp16{}; 58 bool uses_fp16{};
@@ -35,12 +61,16 @@ struct Info {
35 bool uses_fp16_denorms_preserve{}; 61 bool uses_fp16_denorms_preserve{};
36 bool uses_fp32_denorms_flush{}; 62 bool uses_fp32_denorms_flush{};
37 bool uses_fp32_denorms_preserve{}; 63 bool uses_fp32_denorms_preserve{};
64 bool uses_image_1d{};
65 bool uses_sampled_1d{};
66 bool uses_sparse_residency{};
38 67
39 u32 constant_buffer_mask{}; 68 u32 constant_buffer_mask{};
40 69
41 boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> 70 boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
42 constant_buffer_descriptors; 71 constant_buffer_descriptors;
43 boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; 72 boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
73 TextureDescriptors texture_descriptors;
44}; 74};
45 75
46} // namespace Shader 76} // namespace Shader