diff options
Diffstat (limited to 'src/shader_recompiler')
29 files changed, 1378 insertions, 341 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index cc38b28ed..fa268d38f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC | |||
| 9 | backend/spirv/emit_spirv_control_flow.cpp | 9 | backend/spirv/emit_spirv_control_flow.cpp |
| 10 | backend/spirv/emit_spirv_convert.cpp | 10 | backend/spirv/emit_spirv_convert.cpp |
| 11 | backend/spirv/emit_spirv_floating_point.cpp | 11 | backend/spirv/emit_spirv_floating_point.cpp |
| 12 | backend/spirv/emit_spirv_image.cpp | ||
| 12 | backend/spirv/emit_spirv_integer.cpp | 13 | backend/spirv/emit_spirv_integer.cpp |
| 13 | backend/spirv/emit_spirv_logical.cpp | 14 | backend/spirv/emit_spirv_logical.cpp |
| 14 | backend/spirv/emit_spirv_memory.cpp | 15 | backend/spirv/emit_spirv_memory.cpp |
| @@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC | |||
| 100 | frontend/maxwell/translate/impl/predicate_set_predicate.cpp | 101 | frontend/maxwell/translate/impl/predicate_set_predicate.cpp |
| 101 | frontend/maxwell/translate/impl/predicate_set_register.cpp | 102 | frontend/maxwell/translate/impl/predicate_set_register.cpp |
| 102 | frontend/maxwell/translate/impl/select_source_with_predicate.cpp | 103 | frontend/maxwell/translate/impl/select_source_with_predicate.cpp |
| 104 | frontend/maxwell/translate/impl/texture_sample.cpp | ||
| 103 | frontend/maxwell/translate/translate.cpp | 105 | frontend/maxwell/translate/translate.cpp |
| 104 | frontend/maxwell/translate/translate.h | 106 | frontend/maxwell/translate/translate.h |
| 105 | ir_opt/collect_shader_info_pass.cpp | 107 | ir_opt/collect_shader_info_pass.cpp |
| @@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC | |||
| 110 | ir_opt/lower_fp16_to_fp32.cpp | 112 | ir_opt/lower_fp16_to_fp32.cpp |
| 111 | ir_opt/passes.h | 113 | ir_opt/passes.h |
| 112 | ir_opt/ssa_rewrite_pass.cpp | 114 | ir_opt/ssa_rewrite_pass.cpp |
| 115 | ir_opt/texture_pass.cpp | ||
| 113 | ir_opt/verification_pass.cpp | 116 | ir_opt/verification_pass.cpp |
| 114 | object_pool.h | 117 | object_pool.h |
| 115 | profile.h | 118 | profile.h |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d2dbd56d4..21900d387 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -12,6 +12,43 @@ | |||
| 12 | #include "shader_recompiler/backend/spirv/emit_context.h" | 12 | #include "shader_recompiler/backend/spirv/emit_context.h" |
| 13 | 13 | ||
| 14 | namespace Shader::Backend::SPIRV { | 14 | namespace Shader::Backend::SPIRV { |
| 15 | namespace { | ||
| 16 | Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { | ||
| 17 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||
| 18 | const Id type{ctx.F32[1]}; | ||
| 19 | switch (desc.type) { | ||
| 20 | case TextureType::Color1D: | ||
| 21 | return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); | ||
| 22 | case TextureType::ColorArray1D: | ||
| 23 | return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); | ||
| 24 | case TextureType::Color2D: | ||
| 25 | return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); | ||
| 26 | case TextureType::ColorArray2D: | ||
| 27 | return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); | ||
| 28 | case TextureType::Color3D: | ||
| 29 | return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); | ||
| 30 | case TextureType::ColorCube: | ||
| 31 | return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); | ||
| 32 | case TextureType::ColorArrayCube: | ||
| 33 | return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); | ||
| 34 | case TextureType::Shadow1D: | ||
| 35 | return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); | ||
| 36 | case TextureType::ShadowArray1D: | ||
| 37 | return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); | ||
| 38 | case TextureType::Shadow2D: | ||
| 39 | return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); | ||
| 40 | case TextureType::ShadowArray2D: | ||
| 41 | return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); | ||
| 42 | case TextureType::Shadow3D: | ||
| 43 | return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); | ||
| 44 | case TextureType::ShadowCube: | ||
| 45 | return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); | ||
| 46 | case TextureType::ShadowArrayCube: | ||
| 47 | return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); | ||
| 48 | } | ||
| 49 | throw InvalidArgument("Invalid texture type {}", desc.type); | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 15 | 52 | ||
| 16 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | 53 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { |
| 17 | defs[0] = sirit_ctx.Name(base_type, name); | 54 | defs[0] = sirit_ctx.Name(base_type, name); |
| @@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program) | |||
| 35 | u32 binding{}; | 72 | u32 binding{}; |
| 36 | DefineConstantBuffers(program.info, binding); | 73 | DefineConstantBuffers(program.info, binding); |
| 37 | DefineStorageBuffers(program.info, binding); | 74 | DefineStorageBuffers(program.info, binding); |
| 75 | DefineTextures(program.info, binding); | ||
| 38 | 76 | ||
| 39 | DefineLabels(program); | 77 | DefineLabels(program); |
| 40 | } | 78 | } |
| @@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) { | |||
| 46 | return value.Inst()->Definition<Id>(); | 84 | return value.Inst()->Definition<Id>(); |
| 47 | } | 85 | } |
| 48 | switch (value.Type()) { | 86 | switch (value.Type()) { |
| 87 | case IR::Type::Void: | ||
| 88 | // Void instructions are used for optional arguments (e.g. texture offsets) | ||
| 89 | // They are not meant to be used in the SPIR-V module | ||
| 90 | return Id{}; | ||
| 49 | case IR::Type::U1: | 91 | case IR::Type::U1: |
| 50 | return value.U1() ? true_value : false_value; | 92 | return value.U1() ? true_value : false_value; |
| 51 | case IR::Type::U32: | 93 | case IR::Type::U32: |
| @@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | |||
| 122 | uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); | 164 | uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); |
| 123 | 165 | ||
| 124 | u32 index{}; | 166 | u32 index{}; |
| 125 | for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { | 167 | for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { |
| 126 | const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; | 168 | const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; |
| 127 | Decorate(id, spv::Decoration::Binding, binding); | 169 | Decorate(id, spv::Decoration::Binding, binding); |
| 128 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | 170 | Decorate(id, spv::Decoration::DescriptorSet, 0U); |
| @@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | |||
| 152 | storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); | 194 | storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); |
| 153 | 195 | ||
| 154 | u32 index{}; | 196 | u32 index{}; |
| 155 | for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | 197 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { |
| 156 | const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; | 198 | const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; |
| 157 | Decorate(id, spv::Decoration::Binding, binding); | 199 | Decorate(id, spv::Decoration::Binding, binding); |
| 158 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | 200 | Decorate(id, spv::Decoration::DescriptorSet, 0U); |
| @@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | |||
| 163 | } | 205 | } |
| 164 | } | 206 | } |
| 165 | 207 | ||
| 208 | void EmitContext::DefineTextures(const Info& info, u32& binding) { | ||
| 209 | textures.reserve(info.texture_descriptors.size()); | ||
| 210 | for (const TextureDescriptor& desc : info.texture_descriptors) { | ||
| 211 | if (desc.count != 1) { | ||
| 212 | throw NotImplementedException("Array of textures"); | ||
| 213 | } | ||
| 214 | const Id type{TypeSampledImage(ImageType(*this, desc))}; | ||
| 215 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; | ||
| 216 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | ||
| 217 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 218 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 219 | Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); | ||
| 220 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 221 | // TODO: Pass count info | ||
| 222 | textures.push_back(TextureDefinition{ | ||
| 223 | .id{id}, | ||
| 224 | .type{type}, | ||
| 225 | }); | ||
| 226 | } | ||
| 227 | binding += desc.count; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 166 | void EmitContext::DefineLabels(IR::Program& program) { | 231 | void EmitContext::DefineLabels(IR::Program& program) { |
| 167 | for (const IR::Function& function : program.functions) { | 232 | for (const IR::Function& function : program.functions) { |
| 168 | for (IR::Block* const block : function.blocks) { | 233 | for (IR::Block* const block : function.blocks) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d20cf387e..8b3109eb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -29,6 +29,11 @@ private: | |||
| 29 | std::array<Id, 4> defs{}; | 29 | std::array<Id, 4> defs{}; |
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | struct TextureDefinition { | ||
| 33 | Id id; | ||
| 34 | Id type; | ||
| 35 | }; | ||
| 36 | |||
| 32 | class EmitContext final : public Sirit::Module { | 37 | class EmitContext final : public Sirit::Module { |
| 33 | public: | 38 | public: |
| 34 | explicit EmitContext(const Profile& profile, IR::Program& program); | 39 | explicit EmitContext(const Profile& profile, IR::Program& program); |
| @@ -56,6 +61,7 @@ public: | |||
| 56 | 61 | ||
| 57 | std::array<Id, Info::MAX_CBUFS> cbufs{}; | 62 | std::array<Id, Info::MAX_CBUFS> cbufs{}; |
| 58 | std::array<Id, Info::MAX_SSBOS> ssbos{}; | 63 | std::array<Id, Info::MAX_SSBOS> ssbos{}; |
| 64 | std::vector<TextureDefinition> textures; | ||
| 59 | 65 | ||
| 60 | Id workgroup_id{}; | 66 | Id workgroup_id{}; |
| 61 | Id local_invocation_id{}; | 67 | Id local_invocation_id{}; |
| @@ -66,6 +72,7 @@ private: | |||
| 66 | void DefineSpecialVariables(const Info& info); | 72 | void DefineSpecialVariables(const Info& info); |
| 67 | void DefineConstantBuffers(const Info& info, u32& binding); | 73 | void DefineConstantBuffers(const Info& info, u32& binding); |
| 68 | void DefineStorageBuffers(const Info& info, u32& binding); | 74 | void DefineStorageBuffers(const Info& info, u32& binding); |
| 75 | void DefineTextures(const Info& info, u32& binding); | ||
| 69 | void DefineLabels(IR::Program& program); | 76 | void DefineLabels(IR::Program& program); |
| 70 | }; | 77 | }; |
| 71 | 78 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 8097fe82d..a94e9cb2d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -221,6 +221,14 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program | |||
| 221 | workgroup_size[2]); | 221 | workgroup_size[2]); |
| 222 | 222 | ||
| 223 | SetupDenormControl(profile, program, ctx, func); | 223 | SetupDenormControl(profile, program, ctx, func); |
| 224 | if (info.uses_sampled_1d) { | ||
| 225 | ctx.AddCapability(spv::Capability::Sampled1D); | ||
| 226 | } | ||
| 227 | if (info.uses_sparse_residency) { | ||
| 228 | ctx.AddCapability(spv::Capability::SparseResidency); | ||
| 229 | } | ||
| 230 | // TODO: Track this usage | ||
| 231 | ctx.AddCapability(spv::Capability::ImageGatherExtended); | ||
| 224 | 232 | ||
| 225 | return ctx.Assemble(); | 233 | return ctx.Assemble(); |
| 226 | } | 234 | } |
| @@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) { | |||
| 259 | throw LogicError("Unreachable instruction"); | 267 | throw LogicError("Unreachable instruction"); |
| 260 | } | 268 | } |
| 261 | 269 | ||
| 270 | void EmitGetSparseFromOp(EmitContext&) { | ||
| 271 | throw LogicError("Unreachable instruction"); | ||
| 272 | } | ||
| 273 | |||
| 262 | } // namespace Shader::Backend::SPIRV | 274 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 92387ca28..69698c478 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va | |||
| 83 | Id value); | 83 | Id value); |
| 84 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 84 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 85 | Id value); | 85 | Id value); |
| 86 | void EmitWriteStorage128(EmitContext& ctx); | 86 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 87 | Id value); | ||
| 87 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | 88 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); |
| 88 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | 89 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); |
| 89 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | 90 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); |
| @@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx); | |||
| 145 | void EmitGetSignFromOp(EmitContext& ctx); | 146 | void EmitGetSignFromOp(EmitContext& ctx); |
| 146 | void EmitGetCarryFromOp(EmitContext& ctx); | 147 | void EmitGetCarryFromOp(EmitContext& ctx); |
| 147 | void EmitGetOverflowFromOp(EmitContext& ctx); | 148 | void EmitGetOverflowFromOp(EmitContext& ctx); |
| 149 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 148 | Id EmitFPAbs16(EmitContext& ctx, Id value); | 150 | Id EmitFPAbs16(EmitContext& ctx, Id value); |
| 149 | Id EmitFPAbs32(EmitContext& ctx, Id value); | 151 | Id EmitFPAbs32(EmitContext& ctx, Id value); |
| 150 | Id EmitFPAbs64(EmitContext& ctx, Id value); | 152 | Id EmitFPAbs64(EmitContext& ctx, Id value); |
| @@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); | |||
| 291 | Id EmitConvertF32F16(EmitContext& ctx, Id value); | 293 | Id EmitConvertF32F16(EmitContext& ctx, Id value); |
| 292 | Id EmitConvertF32F64(EmitContext& ctx, Id value); | 294 | Id EmitConvertF32F64(EmitContext& ctx, Id value); |
| 293 | Id EmitConvertF64F32(EmitContext& ctx, Id value); | 295 | Id EmitConvertF64F32(EmitContext& ctx, Id value); |
| 296 | Id EmitConvertF16S32(EmitContext& ctx, Id value); | ||
| 297 | Id EmitConvertF16S64(EmitContext& ctx, Id value); | ||
| 298 | Id EmitConvertF16U32(EmitContext& ctx, Id value); | ||
| 299 | Id EmitConvertF16U64(EmitContext& ctx, Id value); | ||
| 300 | Id EmitConvertF32S32(EmitContext& ctx, Id value); | ||
| 301 | Id EmitConvertF32S64(EmitContext& ctx, Id value); | ||
| 302 | Id EmitConvertF32U32(EmitContext& ctx, Id value); | ||
| 303 | Id EmitConvertF32U64(EmitContext& ctx, Id value); | ||
| 304 | Id EmitConvertF64S32(EmitContext& ctx, Id value); | ||
| 305 | Id EmitConvertF64S64(EmitContext& ctx, Id value); | ||
| 306 | Id EmitConvertF64U32(EmitContext& ctx, Id value); | ||
| 307 | Id EmitConvertF64U64(EmitContext& ctx, Id value); | ||
| 308 | Id EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 309 | Id EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 310 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 311 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 312 | Id EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 313 | Id EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 314 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 315 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 316 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 317 | Id bias_lc, Id offset); | ||
| 318 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 319 | Id lod_lc, Id offset); | ||
| 320 | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 321 | Id coords, Id dref, Id bias_lc, Id offset); | ||
| 322 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 323 | Id coords, Id dref, Id lod_lc, Id offset); | ||
| 294 | 324 | ||
| 295 | } // namespace Shader::Backend::SPIRV | 325 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index edcc2a1cc..2aff673aa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | |||
| @@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { | |||
| 102 | return ctx.OpFConvert(ctx.F64[1], value); | 102 | return ctx.OpFConvert(ctx.F64[1], value); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | Id EmitConvertF16S32(EmitContext& ctx, Id value) { | ||
| 106 | return ctx.OpConvertSToF(ctx.F16[1], value); | ||
| 107 | } | ||
| 108 | |||
| 109 | Id EmitConvertF16S64(EmitContext& ctx, Id value) { | ||
| 110 | return ctx.OpConvertSToF(ctx.F16[1], value); | ||
| 111 | } | ||
| 112 | |||
| 113 | Id EmitConvertF16U32(EmitContext& ctx, Id value) { | ||
| 114 | return ctx.OpConvertUToF(ctx.F16[1], value); | ||
| 115 | } | ||
| 116 | |||
| 117 | Id EmitConvertF16U64(EmitContext& ctx, Id value) { | ||
| 118 | return ctx.OpConvertUToF(ctx.F16[1], value); | ||
| 119 | } | ||
| 120 | |||
| 121 | Id EmitConvertF32S32(EmitContext& ctx, Id value) { | ||
| 122 | return ctx.OpConvertSToF(ctx.F32[1], value); | ||
| 123 | } | ||
| 124 | |||
| 125 | Id EmitConvertF32S64(EmitContext& ctx, Id value) { | ||
| 126 | return ctx.OpConvertSToF(ctx.F32[1], value); | ||
| 127 | } | ||
| 128 | |||
| 129 | Id EmitConvertF32U32(EmitContext& ctx, Id value) { | ||
| 130 | return ctx.OpConvertUToF(ctx.F32[1], value); | ||
| 131 | } | ||
| 132 | |||
| 133 | Id EmitConvertF32U64(EmitContext& ctx, Id value) { | ||
| 134 | return ctx.OpConvertUToF(ctx.F32[1], value); | ||
| 135 | } | ||
| 136 | |||
| 137 | Id EmitConvertF64S32(EmitContext& ctx, Id value) { | ||
| 138 | return ctx.OpConvertSToF(ctx.F64[1], value); | ||
| 139 | } | ||
| 140 | |||
| 141 | Id EmitConvertF64S64(EmitContext& ctx, Id value) { | ||
| 142 | return ctx.OpConvertSToF(ctx.F64[1], value); | ||
| 143 | } | ||
| 144 | |||
| 145 | Id EmitConvertF64U32(EmitContext& ctx, Id value) { | ||
| 146 | return ctx.OpConvertUToF(ctx.F64[1], value); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id EmitConvertF64U64(EmitContext& ctx, Id value) { | ||
| 150 | return ctx.OpConvertUToF(ctx.F64[1], value); | ||
| 151 | } | ||
| 152 | |||
| 105 | } // namespace Shader::Backend::SPIRV | 153 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..5f4783c95 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -0,0 +1,146 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <boost/container/static_vector.hpp> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::SPIRV { | ||
| 11 | namespace { | ||
| 12 | class ImageOperands { | ||
| 13 | public: | ||
| 14 | explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, | ||
| 15 | Id lod, Id offset) { | ||
| 16 | if (has_bias) { | ||
| 17 | const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||
| 18 | Add(spv::ImageOperandsMask::Bias, bias); | ||
| 19 | } | ||
| 20 | if (has_lod) { | ||
| 21 | const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||
| 22 | Add(spv::ImageOperandsMask::Lod, lod_value); | ||
| 23 | } | ||
| 24 | if (Sirit::ValidId(offset)) { | ||
| 25 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 26 | } | ||
| 27 | if (has_lod_clamp) { | ||
| 28 | const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; | ||
| 29 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void Add(spv::ImageOperandsMask new_mask, Id value) { | ||
| 34 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | ||
| 35 | static_cast<unsigned>(new_mask)); | ||
| 36 | operands.push_back(value); | ||
| 37 | } | ||
| 38 | |||
| 39 | std::span<const Id> Span() const noexcept { | ||
| 40 | return std::span{operands.data(), operands.size()}; | ||
| 41 | } | ||
| 42 | |||
| 43 | spv::ImageOperandsMask Mask() const noexcept { | ||
| 44 | return mask; | ||
| 45 | } | ||
| 46 | |||
| 47 | private: | ||
| 48 | boost::container::static_vector<Id, 3> operands; | ||
| 49 | spv::ImageOperandsMask mask{}; | ||
| 50 | }; | ||
| 51 | |||
| 52 | Id Texture(EmitContext& ctx, const IR::Value& index) { | ||
| 53 | if (index.IsImmediate()) { | ||
| 54 | const TextureDefinition def{ctx.textures.at(index.U32())}; | ||
| 55 | return ctx.OpLoad(def.type, def.id); | ||
| 56 | } | ||
| 57 | throw NotImplementedException("Indirect texture sample"); | ||
| 58 | } | ||
| 59 | |||
| 60 | template <typename MethodPtrType, typename... Args> | ||
| 61 | Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, | ||
| 62 | Id result_type, Args&&... args) { | ||
| 63 | IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 64 | if (!sparse) { | ||
| 65 | return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...); | ||
| 66 | } | ||
| 67 | const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; | ||
| 68 | const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; | ||
| 69 | const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; | ||
| 70 | sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); | ||
| 71 | sparse->Invalidate(); | ||
| 72 | return ctx.OpCompositeExtract(result_type, sample, 1U); | ||
| 73 | } | ||
| 74 | } // Anonymous namespace | ||
| 75 | |||
| 76 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 77 | throw LogicError("Unreachable instruction"); | ||
| 78 | } | ||
| 79 | |||
| 80 | Id EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 81 | throw LogicError("Unreachable instruction"); | ||
| 82 | } | ||
| 83 | |||
| 84 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 85 | throw LogicError("Unreachable instruction"); | ||
| 86 | } | ||
| 87 | |||
| 88 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 89 | throw LogicError("Unreachable instruction"); | ||
| 90 | } | ||
| 91 | |||
| 92 | Id EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 93 | throw LogicError("Unreachable instruction"); | ||
| 94 | } | ||
| 95 | |||
| 96 | Id EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 97 | throw LogicError("Unreachable instruction"); | ||
| 98 | } | ||
| 99 | |||
| 100 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 101 | throw LogicError("Unreachable instruction"); | ||
| 102 | } | ||
| 103 | |||
| 104 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 105 | throw LogicError("Unreachable instruction"); | ||
| 106 | } | ||
| 107 | |||
| 108 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 109 | Id bias_lc, Id offset) { | ||
| 110 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 111 | const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, | ||
| 112 | offset); | ||
| 113 | return Emit(&EmitContext::OpImageSparseSampleImplicitLod, | ||
| 114 | &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), | ||
| 115 | coords, operands.Mask(), operands.Span()); | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 119 | Id lod_lc, Id offset) { | ||
| 120 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 121 | const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); | ||
| 122 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 123 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), | ||
| 124 | coords, operands.Mask(), operands.Span()); | ||
| 125 | } | ||
| 126 | |||
| 127 | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 128 | Id coords, Id dref, Id bias_lc, Id offset) { | ||
| 129 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 130 | const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, | ||
| 131 | offset); | ||
| 132 | return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, | ||
| 133 | &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], | ||
| 134 | Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | ||
| 135 | } | ||
| 136 | |||
| 137 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 138 | Id coords, Id dref, Id lod_lc, Id offset) { | ||
| 139 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 140 | const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); | ||
| 141 | return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, | ||
| 142 | &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], | ||
| 143 | Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | ||
| 144 | } | ||
| 145 | |||
| 146 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 808c1b401..7d3efc741 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp | |||
| @@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va | |||
| 154 | ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); | 154 | ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); |
| 155 | } | 155 | } |
| 156 | 156 | ||
| 157 | void EmitWriteStorage128(EmitContext&) { | 157 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 158 | throw NotImplementedException("SPIR-V Instruction"); | 158 | Id value) { |
| 159 | if (!binding.IsImmediate()) { | ||
| 160 | throw NotImplementedException("Dynamic storage buffer indexing"); | ||
| 161 | } | ||
| 162 | // TODO: Support reinterpreting bindings, guaranteed to be aligned | ||
| 163 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 164 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 165 | for (u32 element = 0; element < 4; ++element) { | ||
| 166 | Id index = base_index; | ||
| 167 | if (element > 0) { | ||
| 168 | index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); | ||
| 169 | } | ||
| 170 | const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; | ||
| 171 | ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); | ||
| 172 | } | ||
| 159 | } | 173 | } |
| 160 | 174 | ||
| 161 | } // namespace Shader::Backend::SPIRV | 175 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0ba681fb9..0fcb68050 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h | |||
| @@ -12,6 +12,8 @@ public: | |||
| 12 | 12 | ||
| 13 | [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; | 13 | [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; |
| 14 | 14 | ||
| 15 | [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; | ||
| 16 | |||
| 15 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; | 17 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; |
| 16 | }; | 18 | }; |
| 17 | 19 | ||
diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 5127523f9..21700c72b 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp | |||
| @@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { | |||
| 39 | return data[offset / 8]; | 39 | return data[offset / 8]; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | u32 FileEnvironment::TextureBoundBuffer() { | ||
| 43 | throw NotImplementedException("Texture bound buffer serialization"); | ||
| 44 | } | ||
| 45 | |||
| 42 | std::array<u32, 3> FileEnvironment::WorkgroupSize() { | 46 | std::array<u32, 3> FileEnvironment::WorkgroupSize() { |
| 43 | return {1, 1, 1}; | 47 | return {1, 1, 1}; |
| 44 | } | 48 | } |
diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index b8c4bbadd..62302bc8e 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | #include <vector> | 3 | #include <vector> |
| 4 | 4 | ||
| 5 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 6 | #include "environment.h" | 6 | #include "shader_recompiler/environment.h" |
| 7 | 7 | ||
| 8 | namespace Shader { | 8 | namespace Shader { |
| 9 | 9 | ||
| @@ -14,6 +14,8 @@ public: | |||
| 14 | 14 | ||
| 15 | u64 ReadInstruction(u32 offset) override; | 15 | u64 ReadInstruction(u32 offset) override; |
| 16 | 16 | ||
| 17 | u32 TextureBoundBuffer() override; | ||
| 18 | |||
| 17 | std::array<u32, 3> WorkgroupSize() override; | 19 | std::array<u32, 3> WorkgroupSize() override; |
| 18 | 20 | ||
| 19 | private: | 21 | private: |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f38b46bac..ae3354c66 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -7,11 +7,24 @@ | |||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | 7 | #include "shader_recompiler/frontend/ir/value.h" |
| 8 | 8 | ||
| 9 | namespace Shader::IR { | 9 | namespace Shader::IR { |
| 10 | 10 | namespace { | |
| 11 | [[noreturn]] static void ThrowInvalidType(Type type) { | 11 | [[noreturn]] void ThrowInvalidType(Type type) { |
| 12 | throw InvalidArgument("Invalid type {}", type); | 12 | throw InvalidArgument("Invalid type {}", type); |
| 13 | } | 13 | } |
| 14 | 14 | ||
| 15 | Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { | ||
| 16 | if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { | ||
| 17 | return ir.CompositeConstruct(bias_lod, lod_clamp); | ||
| 18 | } else if (!bias_lod.IsEmpty()) { | ||
| 19 | return bias_lod; | ||
| 20 | } else if (!lod_clamp.IsEmpty()) { | ||
| 21 | return lod_clamp; | ||
| 22 | } else { | ||
| 23 | return Value{}; | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 15 | U1 IREmitter::Imm1(bool value) const { | 28 | U1 IREmitter::Imm1(bool value) const { |
| 16 | return U1{Value{value}}; | 29 | return U1{Value{value}}; |
| 17 | } | 30 | } |
| @@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { | |||
| 261 | return Inst<U1>(Opcode::GetOverflowFromOp, op); | 274 | return Inst<U1>(Opcode::GetOverflowFromOp, op); |
| 262 | } | 275 | } |
| 263 | 276 | ||
| 277 | U1 IREmitter::GetSparseFromOp(const Value& op) { | ||
| 278 | return Inst<U1>(Opcode::GetSparseFromOp, op); | ||
| 279 | } | ||
| 280 | |||
| 264 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | 281 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { |
| 265 | if (a.Type() != a.Type()) { | 282 | if (a.Type() != a.Type()) { |
| 266 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | 283 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |
| @@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | |||
| 1035 | } | 1052 | } |
| 1036 | } | 1053 | } |
| 1037 | 1054 | ||
| 1055 | F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { | ||
| 1056 | switch (bitsize) { | ||
| 1057 | case 16: | ||
| 1058 | switch (value.Type()) { | ||
| 1059 | case Type::U32: | ||
| 1060 | return Inst<F16>(Opcode::ConvertF16S32, value); | ||
| 1061 | case Type::U64: | ||
| 1062 | return Inst<F16>(Opcode::ConvertF16S64, value); | ||
| 1063 | default: | ||
| 1064 | ThrowInvalidType(value.Type()); | ||
| 1065 | } | ||
| 1066 | case 32: | ||
| 1067 | switch (value.Type()) { | ||
| 1068 | case Type::U32: | ||
| 1069 | return Inst<F32>(Opcode::ConvertF32S32, value); | ||
| 1070 | case Type::U64: | ||
| 1071 | return Inst<F32>(Opcode::ConvertF32S64, value); | ||
| 1072 | default: | ||
| 1073 | ThrowInvalidType(value.Type()); | ||
| 1074 | } | ||
| 1075 | case 64: | ||
| 1076 | switch (value.Type()) { | ||
| 1077 | case Type::U32: | ||
| 1078 | return Inst<F16>(Opcode::ConvertF64S32, value); | ||
| 1079 | case Type::U64: | ||
| 1080 | return Inst<F16>(Opcode::ConvertF64S64, value); | ||
| 1081 | default: | ||
| 1082 | ThrowInvalidType(value.Type()); | ||
| 1083 | } | ||
| 1084 | default: | ||
| 1085 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1086 | } | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { | ||
| 1090 | switch (bitsize) { | ||
| 1091 | case 16: | ||
| 1092 | switch (value.Type()) { | ||
| 1093 | case Type::U32: | ||
| 1094 | return Inst<F16>(Opcode::ConvertF16U32, value); | ||
| 1095 | case Type::U64: | ||
| 1096 | return Inst<F16>(Opcode::ConvertF16U64, value); | ||
| 1097 | default: | ||
| 1098 | ThrowInvalidType(value.Type()); | ||
| 1099 | } | ||
| 1100 | case 32: | ||
| 1101 | switch (value.Type()) { | ||
| 1102 | case Type::U32: | ||
| 1103 | return Inst<F32>(Opcode::ConvertF32U32, value); | ||
| 1104 | case Type::U64: | ||
| 1105 | return Inst<F32>(Opcode::ConvertF32U64, value); | ||
| 1106 | default: | ||
| 1107 | ThrowInvalidType(value.Type()); | ||
| 1108 | } | ||
| 1109 | case 64: | ||
| 1110 | switch (value.Type()) { | ||
| 1111 | case Type::U32: | ||
| 1112 | return Inst<F16>(Opcode::ConvertF64U32, value); | ||
| 1113 | case Type::U64: | ||
| 1114 | return Inst<F16>(Opcode::ConvertF64U64, value); | ||
| 1115 | default: | ||
| 1116 | ThrowInvalidType(value.Type()); | ||
| 1117 | } | ||
| 1118 | default: | ||
| 1119 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1120 | } | ||
| 1121 | } | ||
| 1122 | |||
| 1123 | F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { | ||
| 1124 | if (is_signed) { | ||
| 1125 | return ConvertSToF(bitsize, value); | ||
| 1126 | } else { | ||
| 1127 | return ConvertUToF(bitsize, value); | ||
| 1128 | } | ||
| 1129 | } | ||
| 1130 | |||
| 1038 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | 1131 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { |
| 1039 | switch (result_bitsize) { | 1132 | switch (result_bitsize) { |
| 1040 | case 32: | 1133 | case 32: |
| @@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | |||
| 1107 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | 1200 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 1108 | } | 1201 | } |
| 1109 | 1202 | ||
| 1203 | Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, | ||
| 1204 | const Value& offset, const F32& lod_clamp, | ||
| 1205 | TextureInstInfo info) { | ||
| 1206 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1207 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod | ||
| 1208 | : Opcode::BindlessImageSampleImplicitLod}; | ||
| 1209 | return Inst(op, Flags{info}, handle, coords, bias_lc, offset); | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, | ||
| 1213 | const Value& offset, const F32& lod_clamp, | ||
| 1214 | TextureInstInfo info) { | ||
| 1215 | const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; | ||
| 1216 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod | ||
| 1217 | : Opcode::BindlessImageSampleExplicitLod}; | ||
| 1218 | return Inst(op, Flags{info}, handle, coords, lod_lc, offset); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1222 | const F32& bias, const Value& offset, | ||
| 1223 | const F32& lod_clamp, TextureInstInfo info) { | ||
| 1224 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1225 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod | ||
| 1226 | : Opcode::BindlessImageSampleDrefImplicitLod}; | ||
| 1227 | return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1231 | const F32& lod, const Value& offset, const F32& lod_clamp, | ||
| 1232 | TextureInstInfo info) { | ||
| 1233 | const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; | ||
| 1234 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod | ||
| 1235 | : Opcode::BindlessImageSampleDrefExplicitLod}; | ||
| 1236 | return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset); | ||
| 1237 | } | ||
| 1238 | |||
| 1110 | } // namespace Shader::IR | 1239 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e29bf0e2..cb2a7710a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -91,6 +91,7 @@ public: | |||
| 91 | [[nodiscard]] U1 GetSignFromOp(const Value& op); | 91 | [[nodiscard]] U1 GetSignFromOp(const Value& op); |
| 92 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | 92 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); |
| 93 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | 93 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); |
| 94 | [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||
| 94 | 95 | ||
| 95 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | 96 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); |
| 96 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); |
| @@ -159,7 +160,7 @@ public: | |||
| 159 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | 160 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, |
| 160 | const U32& count); | 161 | const U32& count); |
| 161 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | 162 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, |
| 162 | bool is_signed); | 163 | bool is_signed = false); |
| 163 | [[nodiscard]] U32 BitReverse(const U32& value); | 164 | [[nodiscard]] U32 BitReverse(const U32& value); |
| 164 | [[nodiscard]] U32 BitCount(const U32& value); | 165 | [[nodiscard]] U32 BitCount(const U32& value); |
| 165 | [[nodiscard]] U32 BitwiseNot(const U32& value); | 166 | [[nodiscard]] U32 BitwiseNot(const U32& value); |
| @@ -186,10 +187,28 @@ public: | |||
| 186 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); | 187 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); |
| 187 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | 188 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); |
| 188 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | 189 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); |
| 190 | [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); | ||
| 191 | [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); | ||
| 192 | [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); | ||
| 189 | 193 | ||
| 190 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | 194 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |
| 191 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | 195 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); |
| 192 | 196 | ||
| 197 | [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, | ||
| 198 | const F32& bias, const Value& offset, | ||
| 199 | const F32& lod_clamp, TextureInstInfo info); | ||
| 200 | [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, | ||
| 201 | const F32& lod, const Value& offset, | ||
| 202 | const F32& lod_clamp, TextureInstInfo info); | ||
| 203 | [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, | ||
| 204 | const F32& dref, const F32& bias, | ||
| 205 | const Value& offset, const F32& lod_clamp, | ||
| 206 | TextureInstInfo info); | ||
| 207 | [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, | ||
| 208 | const F32& dref, const F32& lod, | ||
| 209 | const Value& offset, const F32& lod_clamp, | ||
| 210 | TextureInstInfo info); | ||
| 211 | |||
| 193 | private: | 212 | private: |
| 194 | IR::Block::iterator insertion_point; | 213 | IR::Block::iterator insertion_point; |
| 195 | 214 | ||
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index d6a9be87d..88e186f21 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -10,26 +10,27 @@ | |||
| 10 | #include "shader_recompiler/frontend/ir/type.h" | 10 | #include "shader_recompiler/frontend/ir/type.h" |
| 11 | 11 | ||
| 12 | namespace Shader::IR { | 12 | namespace Shader::IR { |
| 13 | 13 | namespace { | |
| 14 | static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { | 14 | void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { |
| 15 | if (inst && inst->Opcode() != opcode) { | 15 | if (inst && inst->Opcode() != opcode) { |
| 16 | throw LogicError("Invalid pseudo-instruction"); | 16 | throw LogicError("Invalid pseudo-instruction"); |
| 17 | } | 17 | } |
| 18 | } | 18 | } |
| 19 | 19 | ||
| 20 | static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { | 20 | void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { |
| 21 | if (dest_inst) { | 21 | if (dest_inst) { |
| 22 | throw LogicError("Only one of each type of pseudo-op allowed"); | 22 | throw LogicError("Only one of each type of pseudo-op allowed"); |
| 23 | } | 23 | } |
| 24 | dest_inst = pseudo_inst; | 24 | dest_inst = pseudo_inst; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { | 27 | void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { |
| 28 | if (inst->Opcode() != expected_opcode) { | 28 | if (inst->Opcode() != expected_opcode) { |
| 29 | throw LogicError("Undoing use of invalid pseudo-op"); | 29 | throw LogicError("Undoing use of invalid pseudo-op"); |
| 30 | } | 30 | } |
| 31 | inst = nullptr; | 31 | inst = nullptr; |
| 32 | } | 32 | } |
| 33 | } // Anonymous namespace | ||
| 33 | 34 | ||
| 34 | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | 35 | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { |
| 35 | if (op == Opcode::Phi) { | 36 | if (op == Opcode::Phi) { |
| @@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | |||
| 82 | case Opcode::GetSignFromOp: | 83 | case Opcode::GetSignFromOp: |
| 83 | case Opcode::GetCarryFromOp: | 84 | case Opcode::GetCarryFromOp: |
| 84 | case Opcode::GetOverflowFromOp: | 85 | case Opcode::GetOverflowFromOp: |
| 86 | case Opcode::GetSparseFromOp: | ||
| 85 | return true; | 87 | return true; |
| 86 | default: | 88 | default: |
| 87 | return false; | 89 | return false; |
| @@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { | |||
| 96 | [](const IR::Value& value) { return value.IsImmediate(); }); | 98 | [](const IR::Value& value) { return value.IsImmediate(); }); |
| 97 | } | 99 | } |
| 98 | 100 | ||
| 99 | bool Inst::HasAssociatedPseudoOperation() const noexcept { | ||
| 100 | return zero_inst || sign_inst || carry_inst || overflow_inst; | ||
| 101 | } | ||
| 102 | |||
| 103 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | 101 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { |
| 104 | // This is faster than doing a search through the block. | 102 | if (!associated_insts) { |
| 103 | return nullptr; | ||
| 104 | } | ||
| 105 | switch (opcode) { | 105 | switch (opcode) { |
| 106 | case Opcode::GetZeroFromOp: | 106 | case Opcode::GetZeroFromOp: |
| 107 | CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); | 107 | CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); |
| 108 | return zero_inst; | 108 | return associated_insts->zero_inst; |
| 109 | case Opcode::GetSignFromOp: | 109 | case Opcode::GetSignFromOp: |
| 110 | CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); | 110 | CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); |
| 111 | return sign_inst; | 111 | return associated_insts->sign_inst; |
| 112 | case Opcode::GetCarryFromOp: | 112 | case Opcode::GetCarryFromOp: |
| 113 | CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); | 113 | CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); |
| 114 | return carry_inst; | 114 | return associated_insts->carry_inst; |
| 115 | case Opcode::GetOverflowFromOp: | 115 | case Opcode::GetOverflowFromOp: |
| 116 | CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); | 116 | CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); |
| 117 | return overflow_inst; | 117 | return associated_insts->overflow_inst; |
| 118 | case Opcode::GetSparseFromOp: | ||
| 119 | CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||
| 120 | return associated_insts->sparse_inst; | ||
| 118 | default: | 121 | default: |
| 119 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); | 122 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); |
| 120 | } | 123 | } |
| @@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { | |||
| 220 | op = opcode; | 223 | op = opcode; |
| 221 | } | 224 | } |
| 222 | 225 | ||
| 226 | void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { | ||
| 227 | if (!associated_insts) { | ||
| 228 | associated_insts = std::make_unique<AssociatedInsts>(); | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 223 | void Inst::Use(const Value& value) { | 232 | void Inst::Use(const Value& value) { |
| 224 | Inst* const inst{value.Inst()}; | 233 | Inst* const inst{value.Inst()}; |
| 225 | ++inst->use_count; | 234 | ++inst->use_count; |
| 226 | 235 | ||
| 236 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 227 | switch (op) { | 237 | switch (op) { |
| 228 | case Opcode::GetZeroFromOp: | 238 | case Opcode::GetZeroFromOp: |
| 229 | SetPseudoInstruction(inst->zero_inst, this); | 239 | AllocAssociatedInsts(assoc_inst); |
| 240 | SetPseudoInstruction(assoc_inst->zero_inst, this); | ||
| 230 | break; | 241 | break; |
| 231 | case Opcode::GetSignFromOp: | 242 | case Opcode::GetSignFromOp: |
| 232 | SetPseudoInstruction(inst->sign_inst, this); | 243 | AllocAssociatedInsts(assoc_inst); |
| 244 | SetPseudoInstruction(assoc_inst->sign_inst, this); | ||
| 233 | break; | 245 | break; |
| 234 | case Opcode::GetCarryFromOp: | 246 | case Opcode::GetCarryFromOp: |
| 235 | SetPseudoInstruction(inst->carry_inst, this); | 247 | AllocAssociatedInsts(assoc_inst); |
| 248 | SetPseudoInstruction(assoc_inst->carry_inst, this); | ||
| 236 | break; | 249 | break; |
| 237 | case Opcode::GetOverflowFromOp: | 250 | case Opcode::GetOverflowFromOp: |
| 238 | SetPseudoInstruction(inst->overflow_inst, this); | 251 | AllocAssociatedInsts(assoc_inst); |
| 252 | SetPseudoInstruction(assoc_inst->overflow_inst, this); | ||
| 253 | break; | ||
| 254 | case Opcode::GetSparseFromOp: | ||
| 255 | AllocAssociatedInsts(assoc_inst); | ||
| 256 | SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||
| 239 | break; | 257 | break; |
| 240 | default: | 258 | default: |
| 241 | break; | 259 | break; |
| @@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { | |||
| 246 | Inst* const inst{value.Inst()}; | 264 | Inst* const inst{value.Inst()}; |
| 247 | --inst->use_count; | 265 | --inst->use_count; |
| 248 | 266 | ||
| 267 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 249 | switch (op) { | 268 | switch (op) { |
| 250 | case Opcode::GetZeroFromOp: | 269 | case Opcode::GetZeroFromOp: |
| 251 | RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); | 270 | AllocAssociatedInsts(assoc_inst); |
| 271 | RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); | ||
| 252 | break; | 272 | break; |
| 253 | case Opcode::GetSignFromOp: | 273 | case Opcode::GetSignFromOp: |
| 254 | RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); | 274 | AllocAssociatedInsts(assoc_inst); |
| 275 | RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); | ||
| 255 | break; | 276 | break; |
| 256 | case Opcode::GetCarryFromOp: | 277 | case Opcode::GetCarryFromOp: |
| 257 | RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); | 278 | AllocAssociatedInsts(assoc_inst); |
| 279 | RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); | ||
| 258 | break; | 280 | break; |
| 259 | case Opcode::GetOverflowFromOp: | 281 | case Opcode::GetOverflowFromOp: |
| 260 | RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); | 282 | AllocAssociatedInsts(assoc_inst); |
| 283 | RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||
| 261 | break; | 284 | break; |
| 262 | default: | 285 | default: |
| 263 | break; | 286 | break; |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 321393dd7..d5336c438 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h | |||
| @@ -22,7 +22,7 @@ namespace Shader::IR { | |||
| 22 | 22 | ||
| 23 | class Block; | 23 | class Block; |
| 24 | 24 | ||
| 25 | constexpr size_t MAX_ARG_COUNT = 4; | 25 | struct AssociatedInsts; |
| 26 | 26 | ||
| 27 | class Inst : public boost::intrusive::list_base_hook<> { | 27 | class Inst : public boost::intrusive::list_base_hook<> { |
| 28 | public: | 28 | public: |
| @@ -50,6 +50,11 @@ public: | |||
| 50 | return op; | 50 | return op; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | /// Determines if there is a pseudo-operation associated with this instruction. | ||
| 54 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { | ||
| 55 | return associated_insts != nullptr; | ||
| 56 | } | ||
| 57 | |||
| 53 | /// Determines whether or not this instruction may have side effects. | 58 | /// Determines whether or not this instruction may have side effects. |
| 54 | [[nodiscard]] bool MayHaveSideEffects() const noexcept; | 59 | [[nodiscard]] bool MayHaveSideEffects() const noexcept; |
| 55 | 60 | ||
| @@ -60,8 +65,6 @@ public: | |||
| 60 | /// Determines if all arguments of this instruction are immediates. | 65 | /// Determines if all arguments of this instruction are immediates. |
| 61 | [[nodiscard]] bool AreAllArgsImmediates() const; | 66 | [[nodiscard]] bool AreAllArgsImmediates() const; |
| 62 | 67 | ||
| 63 | /// Determines if there is a pseudo-operation associated with this instruction. | ||
| 64 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; | ||
| 65 | /// Gets a pseudo-operation associated with this instruction | 68 | /// Gets a pseudo-operation associated with this instruction |
| 66 | [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); | 69 | [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); |
| 67 | 70 | ||
| @@ -122,14 +125,21 @@ private: | |||
| 122 | u32 definition{}; | 125 | u32 definition{}; |
| 123 | union { | 126 | union { |
| 124 | NonTriviallyDummy dummy{}; | 127 | NonTriviallyDummy dummy{}; |
| 125 | std::array<Value, MAX_ARG_COUNT> args; | ||
| 126 | std::vector<std::pair<Block*, Value>> phi_args; | 128 | std::vector<std::pair<Block*, Value>> phi_args; |
| 129 | std::array<Value, 5> args; | ||
| 130 | }; | ||
| 131 | std::unique_ptr<AssociatedInsts> associated_insts; | ||
| 132 | }; | ||
| 133 | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | ||
| 134 | |||
| 135 | struct AssociatedInsts { | ||
| 136 | union { | ||
| 137 | Inst* sparse_inst; | ||
| 138 | Inst* zero_inst{}; | ||
| 127 | }; | 139 | }; |
| 128 | Inst* zero_inst{}; | ||
| 129 | Inst* sign_inst{}; | 140 | Inst* sign_inst{}; |
| 130 | Inst* carry_inst{}; | 141 | Inst* carry_inst{}; |
| 131 | Inst* overflow_inst{}; | 142 | Inst* overflow_inst{}; |
| 132 | }; | 143 | }; |
| 133 | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); | ||
| 134 | 144 | ||
| 135 | } // namespace Shader::IR | 145 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 44652eae7..ad07700ae 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/bit_field.h" | ||
| 7 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "shader_recompiler/shader_info.h" | ||
| 8 | 10 | ||
| 9 | namespace Shader::IR { | 11 | namespace Shader::IR { |
| 10 | 12 | ||
| @@ -30,4 +32,12 @@ struct FpControl { | |||
| 30 | }; | 32 | }; |
| 31 | static_assert(sizeof(FpControl) <= sizeof(u32)); | 33 | static_assert(sizeof(FpControl) <= sizeof(u32)); |
| 32 | 34 | ||
| 35 | union TextureInstInfo { | ||
| 36 | u32 raw; | ||
| 37 | BitField<0, 8, TextureType> type; | ||
| 38 | BitField<8, 1, u32> has_bias; | ||
| 39 | BitField<16, 1, u32> has_lod_clamp; | ||
| 40 | }; | ||
| 41 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | ||
| 42 | |||
| 33 | } // namespace Shader::IR | 43 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1f188411a..8492a13d5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp | |||
| @@ -14,7 +14,7 @@ namespace { | |||
| 14 | struct OpcodeMeta { | 14 | struct OpcodeMeta { |
| 15 | std::string_view name; | 15 | std::string_view name; |
| 16 | Type type; | 16 | Type type; |
| 17 | std::array<Type, 4> arg_types; | 17 | std::array<Type, 5> arg_types; |
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | using enum Type; | 20 | using enum Type; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c4e72c84d..aa011fab1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -2,301 +2,330 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... | 5 | // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... |
| 6 | OPCODE(Phi, Opaque, ) | 6 | OPCODE(Phi, Opaque, ) |
| 7 | OPCODE(Identity, Opaque, Opaque, ) | 7 | OPCODE(Identity, Opaque, Opaque, ) |
| 8 | OPCODE(Void, Void, ) | 8 | OPCODE(Void, Void, ) |
| 9 | 9 | ||
| 10 | // Control flow | 10 | // Control flow |
| 11 | OPCODE(Branch, Void, Label, ) | 11 | OPCODE(Branch, Void, Label, ) |
| 12 | OPCODE(BranchConditional, Void, U1, Label, Label, ) | 12 | OPCODE(BranchConditional, Void, U1, Label, Label, ) |
| 13 | OPCODE(LoopMerge, Void, Label, Label, ) | 13 | OPCODE(LoopMerge, Void, Label, Label, ) |
| 14 | OPCODE(SelectionMerge, Void, Label, ) | 14 | OPCODE(SelectionMerge, Void, Label, ) |
| 15 | OPCODE(Return, Void, ) | 15 | OPCODE(Return, Void, ) |
| 16 | 16 | ||
| 17 | // Context getters/setters | 17 | // Context getters/setters |
| 18 | OPCODE(GetRegister, U32, Reg, ) | 18 | OPCODE(GetRegister, U32, Reg, ) |
| 19 | OPCODE(SetRegister, Void, Reg, U32, ) | 19 | OPCODE(SetRegister, Void, Reg, U32, ) |
| 20 | OPCODE(GetPred, U1, Pred, ) | 20 | OPCODE(GetPred, U1, Pred, ) |
| 21 | OPCODE(SetPred, Void, Pred, U1, ) | 21 | OPCODE(SetPred, Void, Pred, U1, ) |
| 22 | OPCODE(GetGotoVariable, U1, U32, ) | 22 | OPCODE(GetGotoVariable, U1, U32, ) |
| 23 | OPCODE(SetGotoVariable, Void, U32, U1, ) | 23 | OPCODE(SetGotoVariable, Void, U32, U1, ) |
| 24 | OPCODE(GetCbuf, U32, U32, U32, ) | 24 | OPCODE(GetCbuf, U32, U32, U32, ) |
| 25 | OPCODE(GetAttribute, U32, Attribute, ) | 25 | OPCODE(GetAttribute, U32, Attribute, ) |
| 26 | OPCODE(SetAttribute, Void, Attribute, U32, ) | 26 | OPCODE(SetAttribute, Void, Attribute, U32, ) |
| 27 | OPCODE(GetAttributeIndexed, U32, U32, ) | 27 | OPCODE(GetAttributeIndexed, U32, U32, ) |
| 28 | OPCODE(SetAttributeIndexed, Void, U32, U32, ) | 28 | OPCODE(SetAttributeIndexed, Void, U32, U32, ) |
| 29 | OPCODE(GetZFlag, U1, Void, ) | 29 | OPCODE(GetZFlag, U1, Void, ) |
| 30 | OPCODE(GetSFlag, U1, Void, ) | 30 | OPCODE(GetSFlag, U1, Void, ) |
| 31 | OPCODE(GetCFlag, U1, Void, ) | 31 | OPCODE(GetCFlag, U1, Void, ) |
| 32 | OPCODE(GetOFlag, U1, Void, ) | 32 | OPCODE(GetOFlag, U1, Void, ) |
| 33 | OPCODE(SetZFlag, Void, U1, ) | 33 | OPCODE(SetZFlag, Void, U1, ) |
| 34 | OPCODE(SetSFlag, Void, U1, ) | 34 | OPCODE(SetSFlag, Void, U1, ) |
| 35 | OPCODE(SetCFlag, Void, U1, ) | 35 | OPCODE(SetCFlag, Void, U1, ) |
| 36 | OPCODE(SetOFlag, Void, U1, ) | 36 | OPCODE(SetOFlag, Void, U1, ) |
| 37 | OPCODE(WorkgroupId, U32x3, ) | 37 | OPCODE(WorkgroupId, U32x3, ) |
| 38 | OPCODE(LocalInvocationId, U32x3, ) | 38 | OPCODE(LocalInvocationId, U32x3, ) |
| 39 | 39 | ||
| 40 | // Undefined | 40 | // Undefined |
| 41 | OPCODE(UndefU1, U1, ) | 41 | OPCODE(UndefU1, U1, ) |
| 42 | OPCODE(UndefU8, U8, ) | 42 | OPCODE(UndefU8, U8, ) |
| 43 | OPCODE(UndefU16, U16, ) | 43 | OPCODE(UndefU16, U16, ) |
| 44 | OPCODE(UndefU32, U32, ) | 44 | OPCODE(UndefU32, U32, ) |
| 45 | OPCODE(UndefU64, U64, ) | 45 | OPCODE(UndefU64, U64, ) |
| 46 | 46 | ||
| 47 | // Memory operations | 47 | // Memory operations |
| 48 | OPCODE(LoadGlobalU8, U32, U64, ) | 48 | OPCODE(LoadGlobalU8, U32, U64, ) |
| 49 | OPCODE(LoadGlobalS8, U32, U64, ) | 49 | OPCODE(LoadGlobalS8, U32, U64, ) |
| 50 | OPCODE(LoadGlobalU16, U32, U64, ) | 50 | OPCODE(LoadGlobalU16, U32, U64, ) |
| 51 | OPCODE(LoadGlobalS16, U32, U64, ) | 51 | OPCODE(LoadGlobalS16, U32, U64, ) |
| 52 | OPCODE(LoadGlobal32, U32, U64, ) | 52 | OPCODE(LoadGlobal32, U32, U64, ) |
| 53 | OPCODE(LoadGlobal64, U32x2, U64, ) | 53 | OPCODE(LoadGlobal64, U32x2, U64, ) |
| 54 | OPCODE(LoadGlobal128, U32x4, U64, ) | 54 | OPCODE(LoadGlobal128, U32x4, U64, ) |
| 55 | OPCODE(WriteGlobalU8, Void, U64, U32, ) | 55 | OPCODE(WriteGlobalU8, Void, U64, U32, ) |
| 56 | OPCODE(WriteGlobalS8, Void, U64, U32, ) | 56 | OPCODE(WriteGlobalS8, Void, U64, U32, ) |
| 57 | OPCODE(WriteGlobalU16, Void, U64, U32, ) | 57 | OPCODE(WriteGlobalU16, Void, U64, U32, ) |
| 58 | OPCODE(WriteGlobalS16, Void, U64, U32, ) | 58 | OPCODE(WriteGlobalS16, Void, U64, U32, ) |
| 59 | OPCODE(WriteGlobal32, Void, U64, U32, ) | 59 | OPCODE(WriteGlobal32, Void, U64, U32, ) |
| 60 | OPCODE(WriteGlobal64, Void, U64, U32x2, ) | 60 | OPCODE(WriteGlobal64, Void, U64, U32x2, ) |
| 61 | OPCODE(WriteGlobal128, Void, U64, U32x4, ) | 61 | OPCODE(WriteGlobal128, Void, U64, U32x4, ) |
| 62 | 62 | ||
| 63 | // Storage buffer operations | 63 | // Storage buffer operations |
| 64 | OPCODE(LoadStorageU8, U32, U32, U32, ) | 64 | OPCODE(LoadStorageU8, U32, U32, U32, ) |
| 65 | OPCODE(LoadStorageS8, U32, U32, U32, ) | 65 | OPCODE(LoadStorageS8, U32, U32, U32, ) |
| 66 | OPCODE(LoadStorageU16, U32, U32, U32, ) | 66 | OPCODE(LoadStorageU16, U32, U32, U32, ) |
| 67 | OPCODE(LoadStorageS16, U32, U32, U32, ) | 67 | OPCODE(LoadStorageS16, U32, U32, U32, ) |
| 68 | OPCODE(LoadStorage32, U32, U32, U32, ) | 68 | OPCODE(LoadStorage32, U32, U32, U32, ) |
| 69 | OPCODE(LoadStorage64, U32x2, U32, U32, ) | 69 | OPCODE(LoadStorage64, U32x2, U32, U32, ) |
| 70 | OPCODE(LoadStorage128, U32x4, U32, U32, ) | 70 | OPCODE(LoadStorage128, U32x4, U32, U32, ) |
| 71 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) | 71 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) |
| 72 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) | 72 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) |
| 73 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) | 73 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) |
| 74 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) | 74 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) |
| 75 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) | 75 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) |
| 76 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) | 76 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) |
| 77 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) | 77 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) |
| 78 | 78 | ||
| 79 | // Vector utility | 79 | // Vector utility |
| 80 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) | 80 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) |
| 81 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) | 81 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) |
| 82 | OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) | 82 | OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) |
| 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) |
| 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) |
| 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) |
| 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) |
| 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) |
| 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) |
| 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) |
| 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) |
| 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) |
| 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) |
| 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) |
| 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) |
| 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) |
| 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) |
| 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) |
| 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) |
| 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) |
| 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) |
| 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) |
| 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) |
| 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) |
| 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) |
| 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) |
| 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) |
| 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) |
| 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) |
| 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) |
| 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) |
| 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) |
| 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) |
| 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) |
| 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) |
| 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) |
| 116 | 116 | ||
| 117 | // Select operations | 117 | // Select operations |
| 118 | OPCODE(SelectU1, U1, U1, U1, U1, ) | 118 | OPCODE(SelectU1, U1, U1, U1, U1, ) |
| 119 | OPCODE(SelectU8, U8, U1, U8, U8, ) | 119 | OPCODE(SelectU8, U8, U1, U8, U8, ) |
| 120 | OPCODE(SelectU16, U16, U1, U16, U16, ) | 120 | OPCODE(SelectU16, U16, U1, U16, U16, ) |
| 121 | OPCODE(SelectU32, U32, U1, U32, U32, ) | 121 | OPCODE(SelectU32, U32, U1, U32, U32, ) |
| 122 | OPCODE(SelectU64, U64, U1, U64, U64, ) | 122 | OPCODE(SelectU64, U64, U1, U64, U64, ) |
| 123 | OPCODE(SelectF16, F16, U1, F16, F16, ) | 123 | OPCODE(SelectF16, F16, U1, F16, F16, ) |
| 124 | OPCODE(SelectF32, F32, U1, F32, F32, ) | 124 | OPCODE(SelectF32, F32, U1, F32, F32, ) |
| 125 | 125 | ||
| 126 | // Bitwise conversions | 126 | // Bitwise conversions |
| 127 | OPCODE(BitCastU16F16, U16, F16, ) | 127 | OPCODE(BitCastU16F16, U16, F16, ) |
| 128 | OPCODE(BitCastU32F32, U32, F32, ) | 128 | OPCODE(BitCastU32F32, U32, F32, ) |
| 129 | OPCODE(BitCastU64F64, U64, F64, ) | 129 | OPCODE(BitCastU64F64, U64, F64, ) |
| 130 | OPCODE(BitCastF16U16, F16, U16, ) | 130 | OPCODE(BitCastF16U16, F16, U16, ) |
| 131 | OPCODE(BitCastF32U32, F32, U32, ) | 131 | OPCODE(BitCastF32U32, F32, U32, ) |
| 132 | OPCODE(BitCastF64U64, F64, U64, ) | 132 | OPCODE(BitCastF64U64, F64, U64, ) |
| 133 | OPCODE(PackUint2x32, U64, U32x2, ) | 133 | OPCODE(PackUint2x32, U64, U32x2, ) |
| 134 | OPCODE(UnpackUint2x32, U32x2, U64, ) | 134 | OPCODE(UnpackUint2x32, U32x2, U64, ) |
| 135 | OPCODE(PackFloat2x16, U32, F16x2, ) | 135 | OPCODE(PackFloat2x16, U32, F16x2, ) |
| 136 | OPCODE(UnpackFloat2x16, F16x2, U32, ) | 136 | OPCODE(UnpackFloat2x16, F16x2, U32, ) |
| 137 | OPCODE(PackHalf2x16, U32, F32x2, ) | 137 | OPCODE(PackHalf2x16, U32, F32x2, ) |
| 138 | OPCODE(UnpackHalf2x16, F32x2, U32, ) | 138 | OPCODE(UnpackHalf2x16, F32x2, U32, ) |
| 139 | OPCODE(PackDouble2x32, F64, U32x2, ) | 139 | OPCODE(PackDouble2x32, F64, U32x2, ) |
| 140 | OPCODE(UnpackDouble2x32, U32x2, F64, ) | 140 | OPCODE(UnpackDouble2x32, U32x2, F64, ) |
| 141 | 141 | ||
| 142 | // Pseudo-operation, handled specially at final emit | 142 | // Pseudo-operation, handled specially at final emit |
| 143 | OPCODE(GetZeroFromOp, U1, Opaque, ) | 143 | OPCODE(GetZeroFromOp, U1, Opaque, ) |
| 144 | OPCODE(GetSignFromOp, U1, Opaque, ) | 144 | OPCODE(GetSignFromOp, U1, Opaque, ) |
| 145 | OPCODE(GetCarryFromOp, U1, Opaque, ) | 145 | OPCODE(GetCarryFromOp, U1, Opaque, ) |
| 146 | OPCODE(GetOverflowFromOp, U1, Opaque, ) | 146 | OPCODE(GetOverflowFromOp, U1, Opaque, ) |
| 147 | OPCODE(GetSparseFromOp, U1, Opaque, ) | ||
| 147 | 148 | ||
| 148 | // Floating-point operations | 149 | // Floating-point operations |
| 149 | OPCODE(FPAbs16, F16, F16, ) | 150 | OPCODE(FPAbs16, F16, F16, ) |
| 150 | OPCODE(FPAbs32, F32, F32, ) | 151 | OPCODE(FPAbs32, F32, F32, ) |
| 151 | OPCODE(FPAbs64, F64, F64, ) | 152 | OPCODE(FPAbs64, F64, F64, ) |
| 152 | OPCODE(FPAdd16, F16, F16, F16, ) | 153 | OPCODE(FPAdd16, F16, F16, F16, ) |
| 153 | OPCODE(FPAdd32, F32, F32, F32, ) | 154 | OPCODE(FPAdd32, F32, F32, F32, ) |
| 154 | OPCODE(FPAdd64, F64, F64, F64, ) | 155 | OPCODE(FPAdd64, F64, F64, F64, ) |
| 155 | OPCODE(FPFma16, F16, F16, F16, F16, ) | 156 | OPCODE(FPFma16, F16, F16, F16, F16, ) |
| 156 | OPCODE(FPFma32, F32, F32, F32, F32, ) | 157 | OPCODE(FPFma32, F32, F32, F32, F32, ) |
| 157 | OPCODE(FPFma64, F64, F64, F64, F64, ) | 158 | OPCODE(FPFma64, F64, F64, F64, F64, ) |
| 158 | OPCODE(FPMax32, F32, F32, F32, ) | 159 | OPCODE(FPMax32, F32, F32, F32, ) |
| 159 | OPCODE(FPMax64, F64, F64, F64, ) | 160 | OPCODE(FPMax64, F64, F64, F64, ) |
| 160 | OPCODE(FPMin32, F32, F32, F32, ) | 161 | OPCODE(FPMin32, F32, F32, F32, ) |
| 161 | OPCODE(FPMin64, F64, F64, F64, ) | 162 | OPCODE(FPMin64, F64, F64, F64, ) |
| 162 | OPCODE(FPMul16, F16, F16, F16, ) | 163 | OPCODE(FPMul16, F16, F16, F16, ) |
| 163 | OPCODE(FPMul32, F32, F32, F32, ) | 164 | OPCODE(FPMul32, F32, F32, F32, ) |
| 164 | OPCODE(FPMul64, F64, F64, F64, ) | 165 | OPCODE(FPMul64, F64, F64, F64, ) |
| 165 | OPCODE(FPNeg16, F16, F16, ) | 166 | OPCODE(FPNeg16, F16, F16, ) |
| 166 | OPCODE(FPNeg32, F32, F32, ) | 167 | OPCODE(FPNeg32, F32, F32, ) |
| 167 | OPCODE(FPNeg64, F64, F64, ) | 168 | OPCODE(FPNeg64, F64, F64, ) |
| 168 | OPCODE(FPRecip32, F32, F32, ) | 169 | OPCODE(FPRecip32, F32, F32, ) |
| 169 | OPCODE(FPRecip64, F64, F64, ) | 170 | OPCODE(FPRecip64, F64, F64, ) |
| 170 | OPCODE(FPRecipSqrt32, F32, F32, ) | 171 | OPCODE(FPRecipSqrt32, F32, F32, ) |
| 171 | OPCODE(FPRecipSqrt64, F64, F64, ) | 172 | OPCODE(FPRecipSqrt64, F64, F64, ) |
| 172 | OPCODE(FPSqrt, F32, F32, ) | 173 | OPCODE(FPSqrt, F32, F32, ) |
| 173 | OPCODE(FPSin, F32, F32, ) | 174 | OPCODE(FPSin, F32, F32, ) |
| 174 | OPCODE(FPExp2, F32, F32, ) | 175 | OPCODE(FPExp2, F32, F32, ) |
| 175 | OPCODE(FPCos, F32, F32, ) | 176 | OPCODE(FPCos, F32, F32, ) |
| 176 | OPCODE(FPLog2, F32, F32, ) | 177 | OPCODE(FPLog2, F32, F32, ) |
| 177 | OPCODE(FPSaturate16, F16, F16, ) | 178 | OPCODE(FPSaturate16, F16, F16, ) |
| 178 | OPCODE(FPSaturate32, F32, F32, ) | 179 | OPCODE(FPSaturate32, F32, F32, ) |
| 179 | OPCODE(FPSaturate64, F64, F64, ) | 180 | OPCODE(FPSaturate64, F64, F64, ) |
| 180 | OPCODE(FPRoundEven16, F16, F16, ) | 181 | OPCODE(FPRoundEven16, F16, F16, ) |
| 181 | OPCODE(FPRoundEven32, F32, F32, ) | 182 | OPCODE(FPRoundEven32, F32, F32, ) |
| 182 | OPCODE(FPRoundEven64, F64, F64, ) | 183 | OPCODE(FPRoundEven64, F64, F64, ) |
| 183 | OPCODE(FPFloor16, F16, F16, ) | 184 | OPCODE(FPFloor16, F16, F16, ) |
| 184 | OPCODE(FPFloor32, F32, F32, ) | 185 | OPCODE(FPFloor32, F32, F32, ) |
| 185 | OPCODE(FPFloor64, F64, F64, ) | 186 | OPCODE(FPFloor64, F64, F64, ) |
| 186 | OPCODE(FPCeil16, F16, F16, ) | 187 | OPCODE(FPCeil16, F16, F16, ) |
| 187 | OPCODE(FPCeil32, F32, F32, ) | 188 | OPCODE(FPCeil32, F32, F32, ) |
| 188 | OPCODE(FPCeil64, F64, F64, ) | 189 | OPCODE(FPCeil64, F64, F64, ) |
| 189 | OPCODE(FPTrunc16, F16, F16, ) | 190 | OPCODE(FPTrunc16, F16, F16, ) |
| 190 | OPCODE(FPTrunc32, F32, F32, ) | 191 | OPCODE(FPTrunc32, F32, F32, ) |
| 191 | OPCODE(FPTrunc64, F64, F64, ) | 192 | OPCODE(FPTrunc64, F64, F64, ) |
| 192 | 193 | ||
| 193 | OPCODE(FPOrdEqual16, U1, F16, F16, ) | 194 | OPCODE(FPOrdEqual16, U1, F16, F16, ) |
| 194 | OPCODE(FPOrdEqual32, U1, F32, F32, ) | 195 | OPCODE(FPOrdEqual32, U1, F32, F32, ) |
| 195 | OPCODE(FPOrdEqual64, U1, F64, F64, ) | 196 | OPCODE(FPOrdEqual64, U1, F64, F64, ) |
| 196 | OPCODE(FPUnordEqual16, U1, F16, F16, ) | 197 | OPCODE(FPUnordEqual16, U1, F16, F16, ) |
| 197 | OPCODE(FPUnordEqual32, U1, F32, F32, ) | 198 | OPCODE(FPUnordEqual32, U1, F32, F32, ) |
| 198 | OPCODE(FPUnordEqual64, U1, F64, F64, ) | 199 | OPCODE(FPUnordEqual64, U1, F64, F64, ) |
| 199 | OPCODE(FPOrdNotEqual16, U1, F16, F16, ) | 200 | OPCODE(FPOrdNotEqual16, U1, F16, F16, ) |
| 200 | OPCODE(FPOrdNotEqual32, U1, F32, F32, ) | 201 | OPCODE(FPOrdNotEqual32, U1, F32, F32, ) |
| 201 | OPCODE(FPOrdNotEqual64, U1, F64, F64, ) | 202 | OPCODE(FPOrdNotEqual64, U1, F64, F64, ) |
| 202 | OPCODE(FPUnordNotEqual16, U1, F16, F16, ) | 203 | OPCODE(FPUnordNotEqual16, U1, F16, F16, ) |
| 203 | OPCODE(FPUnordNotEqual32, U1, F32, F32, ) | 204 | OPCODE(FPUnordNotEqual32, U1, F32, F32, ) |
| 204 | OPCODE(FPUnordNotEqual64, U1, F64, F64, ) | 205 | OPCODE(FPUnordNotEqual64, U1, F64, F64, ) |
| 205 | OPCODE(FPOrdLessThan16, U1, F16, F16, ) | 206 | OPCODE(FPOrdLessThan16, U1, F16, F16, ) |
| 206 | OPCODE(FPOrdLessThan32, U1, F32, F32, ) | 207 | OPCODE(FPOrdLessThan32, U1, F32, F32, ) |
| 207 | OPCODE(FPOrdLessThan64, U1, F64, F64, ) | 208 | OPCODE(FPOrdLessThan64, U1, F64, F64, ) |
| 208 | OPCODE(FPUnordLessThan16, U1, F16, F16, ) | 209 | OPCODE(FPUnordLessThan16, U1, F16, F16, ) |
| 209 | OPCODE(FPUnordLessThan32, U1, F32, F32, ) | 210 | OPCODE(FPUnordLessThan32, U1, F32, F32, ) |
| 210 | OPCODE(FPUnordLessThan64, U1, F64, F64, ) | 211 | OPCODE(FPUnordLessThan64, U1, F64, F64, ) |
| 211 | OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) | 212 | OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) |
| 212 | OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) | 213 | OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) |
| 213 | OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) | 214 | OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) |
| 214 | OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) | 215 | OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) |
| 215 | OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) | 216 | OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) |
| 216 | OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) | 217 | OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) |
| 217 | OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) | 218 | OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) |
| 218 | OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) | 219 | OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) |
| 219 | OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) | 220 | OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) |
| 220 | OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) | 221 | OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) |
| 221 | OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) | 222 | OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) |
| 222 | OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) | 223 | OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) |
| 223 | OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) | 224 | OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) |
| 224 | OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) | 225 | OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) |
| 225 | OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) | 226 | OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) |
| 226 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) | 227 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) |
| 227 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) | 228 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) |
| 228 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) | 229 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) |
| 229 | 230 | ||
| 230 | // Integer operations | 231 | // Integer operations |
| 231 | OPCODE(IAdd32, U32, U32, U32, ) | 232 | OPCODE(IAdd32, U32, U32, U32, ) |
| 232 | OPCODE(IAdd64, U64, U64, U64, ) | 233 | OPCODE(IAdd64, U64, U64, U64, ) |
| 233 | OPCODE(ISub32, U32, U32, U32, ) | 234 | OPCODE(ISub32, U32, U32, U32, ) |
| 234 | OPCODE(ISub64, U64, U64, U64, ) | 235 | OPCODE(ISub64, U64, U64, U64, ) |
| 235 | OPCODE(IMul32, U32, U32, U32, ) | 236 | OPCODE(IMul32, U32, U32, U32, ) |
| 236 | OPCODE(INeg32, U32, U32, ) | 237 | OPCODE(INeg32, U32, U32, ) |
| 237 | OPCODE(INeg64, U64, U64, ) | 238 | OPCODE(INeg64, U64, U64, ) |
| 238 | OPCODE(IAbs32, U32, U32, ) | 239 | OPCODE(IAbs32, U32, U32, ) |
| 239 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) | 240 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) |
| 240 | OPCODE(ShiftLeftLogical64, U64, U64, U32, ) | 241 | OPCODE(ShiftLeftLogical64, U64, U64, U32, ) |
| 241 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) | 242 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) |
| 242 | OPCODE(ShiftRightLogical64, U64, U64, U32, ) | 243 | OPCODE(ShiftRightLogical64, U64, U64, U32, ) |
| 243 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) | 244 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) |
| 244 | OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) | 245 | OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) |
| 245 | OPCODE(BitwiseAnd32, U32, U32, U32, ) | 246 | OPCODE(BitwiseAnd32, U32, U32, U32, ) |
| 246 | OPCODE(BitwiseOr32, U32, U32, U32, ) | 247 | OPCODE(BitwiseOr32, U32, U32, U32, ) |
| 247 | OPCODE(BitwiseXor32, U32, U32, U32, ) | 248 | OPCODE(BitwiseXor32, U32, U32, U32, ) |
| 248 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) | 249 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) |
| 249 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) | 250 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) |
| 250 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) | 251 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) |
| 251 | OPCODE(BitReverse32, U32, U32, ) | 252 | OPCODE(BitReverse32, U32, U32, ) |
| 252 | OPCODE(BitCount32, U32, U32, ) | 253 | OPCODE(BitCount32, U32, U32, ) |
| 253 | OPCODE(BitwiseNot32, U32, U32, ) | 254 | OPCODE(BitwiseNot32, U32, U32, ) |
| 254 | 255 | ||
| 255 | OPCODE(FindSMsb32, U32, U32, ) | 256 | OPCODE(FindSMsb32, U32, U32, ) |
| 256 | OPCODE(FindUMsb32, U32, U32, ) | 257 | OPCODE(FindUMsb32, U32, U32, ) |
| 257 | OPCODE(SMin32, U32, U32, U32, ) | 258 | OPCODE(SMin32, U32, U32, U32, ) |
| 258 | OPCODE(UMin32, U32, U32, U32, ) | 259 | OPCODE(UMin32, U32, U32, U32, ) |
| 259 | OPCODE(SMax32, U32, U32, U32, ) | 260 | OPCODE(SMax32, U32, U32, U32, ) |
| 260 | OPCODE(UMax32, U32, U32, U32, ) | 261 | OPCODE(UMax32, U32, U32, U32, ) |
| 261 | OPCODE(SLessThan, U1, U32, U32, ) | 262 | OPCODE(SLessThan, U1, U32, U32, ) |
| 262 | OPCODE(ULessThan, U1, U32, U32, ) | 263 | OPCODE(ULessThan, U1, U32, U32, ) |
| 263 | OPCODE(IEqual, U1, U32, U32, ) | 264 | OPCODE(IEqual, U1, U32, U32, ) |
| 264 | OPCODE(SLessThanEqual, U1, U32, U32, ) | 265 | OPCODE(SLessThanEqual, U1, U32, U32, ) |
| 265 | OPCODE(ULessThanEqual, U1, U32, U32, ) | 266 | OPCODE(ULessThanEqual, U1, U32, U32, ) |
| 266 | OPCODE(SGreaterThan, U1, U32, U32, ) | 267 | OPCODE(SGreaterThan, U1, U32, U32, ) |
| 267 | OPCODE(UGreaterThan, U1, U32, U32, ) | 268 | OPCODE(UGreaterThan, U1, U32, U32, ) |
| 268 | OPCODE(INotEqual, U1, U32, U32, ) | 269 | OPCODE(INotEqual, U1, U32, U32, ) |
| 269 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) | 270 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) |
| 270 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) | 271 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) |
| 271 | 272 | ||
| 272 | // Logical operations | 273 | // Logical operations |
| 273 | OPCODE(LogicalOr, U1, U1, U1, ) | 274 | OPCODE(LogicalOr, U1, U1, U1, ) |
| 274 | OPCODE(LogicalAnd, U1, U1, U1, ) | 275 | OPCODE(LogicalAnd, U1, U1, U1, ) |
| 275 | OPCODE(LogicalXor, U1, U1, U1, ) | 276 | OPCODE(LogicalXor, U1, U1, U1, ) |
| 276 | OPCODE(LogicalNot, U1, U1, ) | 277 | OPCODE(LogicalNot, U1, U1, ) |
| 277 | 278 | ||
| 278 | // Conversion operations | 279 | // Conversion operations |
| 279 | OPCODE(ConvertS16F16, U32, F16, ) | 280 | OPCODE(ConvertS16F16, U32, F16, ) |
| 280 | OPCODE(ConvertS16F32, U32, F32, ) | 281 | OPCODE(ConvertS16F32, U32, F32, ) |
| 281 | OPCODE(ConvertS16F64, U32, F64, ) | 282 | OPCODE(ConvertS16F64, U32, F64, ) |
| 282 | OPCODE(ConvertS32F16, U32, F16, ) | 283 | OPCODE(ConvertS32F16, U32, F16, ) |
| 283 | OPCODE(ConvertS32F32, U32, F32, ) | 284 | OPCODE(ConvertS32F32, U32, F32, ) |
| 284 | OPCODE(ConvertS32F64, U32, F64, ) | 285 | OPCODE(ConvertS32F64, U32, F64, ) |
| 285 | OPCODE(ConvertS64F16, U64, F16, ) | 286 | OPCODE(ConvertS64F16, U64, F16, ) |
| 286 | OPCODE(ConvertS64F32, U64, F32, ) | 287 | OPCODE(ConvertS64F32, U64, F32, ) |
| 287 | OPCODE(ConvertS64F64, U64, F64, ) | 288 | OPCODE(ConvertS64F64, U64, F64, ) |
| 288 | OPCODE(ConvertU16F16, U32, F16, ) | 289 | OPCODE(ConvertU16F16, U32, F16, ) |
| 289 | OPCODE(ConvertU16F32, U32, F32, ) | 290 | OPCODE(ConvertU16F32, U32, F32, ) |
| 290 | OPCODE(ConvertU16F64, U32, F64, ) | 291 | OPCODE(ConvertU16F64, U32, F64, ) |
| 291 | OPCODE(ConvertU32F16, U32, F16, ) | 292 | OPCODE(ConvertU32F16, U32, F16, ) |
| 292 | OPCODE(ConvertU32F32, U32, F32, ) | 293 | OPCODE(ConvertU32F32, U32, F32, ) |
| 293 | OPCODE(ConvertU32F64, U32, F64, ) | 294 | OPCODE(ConvertU32F64, U32, F64, ) |
| 294 | OPCODE(ConvertU64F16, U64, F16, ) | 295 | OPCODE(ConvertU64F16, U64, F16, ) |
| 295 | OPCODE(ConvertU64F32, U64, F32, ) | 296 | OPCODE(ConvertU64F32, U64, F32, ) |
| 296 | OPCODE(ConvertU64F64, U64, F64, ) | 297 | OPCODE(ConvertU64F64, U64, F64, ) |
| 297 | OPCODE(ConvertU64U32, U64, U32, ) | 298 | OPCODE(ConvertU64U32, U64, U32, ) |
| 298 | OPCODE(ConvertU32U64, U32, U64, ) | 299 | OPCODE(ConvertU32U64, U32, U64, ) |
| 299 | OPCODE(ConvertF16F32, F16, F32, ) | 300 | OPCODE(ConvertF16F32, F16, F32, ) |
| 300 | OPCODE(ConvertF32F16, F32, F16, ) | 301 | OPCODE(ConvertF32F16, F32, F16, ) |
| 301 | OPCODE(ConvertF32F64, F32, F64, ) | 302 | OPCODE(ConvertF32F64, F32, F64, ) |
| 302 | OPCODE(ConvertF64F32, F64, F32, ) | 303 | OPCODE(ConvertF64F32, F64, F32, ) |
| 304 | OPCODE(ConvertF16S32, F16, U32, ) | ||
| 305 | OPCODE(ConvertF16S64, F16, U64, ) | ||
| 306 | OPCODE(ConvertF16U32, F16, U32, ) | ||
| 307 | OPCODE(ConvertF16U64, F16, U64, ) | ||
| 308 | OPCODE(ConvertF32S32, F32, U32, ) | ||
| 309 | OPCODE(ConvertF32S64, F32, U64, ) | ||
| 310 | OPCODE(ConvertF32U32, F32, U32, ) | ||
| 311 | OPCODE(ConvertF32U64, F32, U64, ) | ||
| 312 | OPCODE(ConvertF64S32, F64, U32, ) | ||
| 313 | OPCODE(ConvertF64S64, F64, U64, ) | ||
| 314 | OPCODE(ConvertF64U32, F64, U32, ) | ||
| 315 | OPCODE(ConvertF64U64, F64, U64, ) | ||
| 316 | |||
| 317 | // Image operations | ||
| 318 | OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 319 | OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 320 | OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 321 | OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 322 | |||
| 323 | OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 324 | OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 325 | OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 326 | OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 327 | |||
| 328 | OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 329 | OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 330 | OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 331 | OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 771094eb9..8fea05f7b 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h | |||
| @@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; | |||
| 293 | return reg + (-num); | 293 | return reg + (-num); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | [[nodiscard]] constexpr Reg operator++(Reg& reg) { | ||
| 297 | reg = reg + 1; | ||
| 298 | return reg; | ||
| 299 | } | ||
| 300 | |||
| 301 | [[nodiscard]] constexpr Reg operator++(Reg& reg, int) { | ||
| 302 | const Reg copy{reg}; | ||
| 303 | reg = reg + 1; | ||
| 304 | return copy; | ||
| 305 | } | ||
| 306 | |||
| 296 | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { | 307 | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { |
| 297 | return static_cast<size_t>(reg); | 308 | return static_cast<size_t>(reg); |
| 298 | } | 309 | } |
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 9b7e1480b..3602883d6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -75,6 +75,7 @@ private: | |||
| 75 | f64 imm_f64; | 75 | f64 imm_f64; |
| 76 | }; | 76 | }; |
| 77 | }; | 77 | }; |
| 78 | static_assert(std::is_trivially_copyable_v<Value>); | ||
| 78 | 79 | ||
| 79 | template <IR::Type type_> | 80 | template <IR::Type type_> |
| 80 | class TypedValue : public Value { | 81 | class TypedValue : public Value { |
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc | |||
| @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") | |||
| 249 | INST(SURED, "SURED", "1110 1011 010- ----") | 249 | INST(SURED, "SURED", "1110 1011 010- ----") |
| 250 | INST(SUST, "SUST", "1110 1011 001- ----") | 250 | INST(SUST, "SUST", "1110 1011 001- ----") |
| 251 | INST(SYNC, "SYNC", "1111 0000 1111 1---") | 251 | INST(SYNC, "SYNC", "1111 0000 1111 1---") |
| 252 | INST(TEX, "TEX", "1100 00-- --11 1---") | 252 | INST(TEX, "TEX", "1100 0--- ---- ----") |
| 253 | INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") | 253 | INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") |
| 254 | INST(TEXS, "TEXS", "1101 -00- ---- ----") | 254 | INST(TEXS, "TEXS", "1101 -00- ---- ----") |
| 255 | INST(TLD, "TLD", "1101 1100 --11 1---") | 255 | INST(TLD, "TLD", "1101 1100 --11 1---") |
| 256 | INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") | 256 | INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 62 | Optimization::SsaRewritePass(function.post_order_blocks); | 62 | Optimization::SsaRewritePass(function.post_order_blocks); |
| 63 | } | 63 | } |
| 64 | Optimization::GlobalMemoryToStorageBufferPass(program); | 64 | Optimization::GlobalMemoryToStorageBufferPass(program); |
| 65 | Optimization::TexturePass(env, program); | ||
| 65 | for (IR::Function& function : functions) { | 66 | for (IR::Function& function : functions) { |
| 66 | Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); | 67 | Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); |
| 67 | Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); | 68 | Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index fc6030e04..ff429c126 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) { | |||
| 585 | ThrowNotImplemented(Opcode::SYNC); | 585 | ThrowNotImplemented(Opcode::SYNC); |
| 586 | } | 586 | } |
| 587 | 587 | ||
| 588 | void TranslatorVisitor::TEX(u64) { | ||
| 589 | ThrowNotImplemented(Opcode::TEX); | ||
| 590 | } | ||
| 591 | |||
| 592 | void TranslatorVisitor::TEX_b(u64) { | ||
| 593 | ThrowNotImplemented(Opcode::TEX_b); | ||
| 594 | } | ||
| 595 | |||
| 596 | void TranslatorVisitor::TEXS(u64) { | 588 | void TranslatorVisitor::TEXS(u64) { |
| 597 | ThrowNotImplemented(Opcode::TEXS); | 589 | ThrowNotImplemented(Opcode::TEXS); |
| 598 | } | 590 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp new file mode 100644 index 000000000..98d9f4c64 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp | |||
| @@ -0,0 +1,232 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Blod : u64 { | ||
| 15 | None, | ||
| 16 | LZ, | ||
| 17 | LB, | ||
| 18 | LL, | ||
| 19 | INVALIDBLOD4, | ||
| 20 | INVALIDBLOD5, | ||
| 21 | LBA, | ||
| 22 | LLA, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class TextureType : u64 { | ||
| 26 | _1D, | ||
| 27 | ARRAY_1D, | ||
| 28 | _2D, | ||
| 29 | ARRAY_2D, | ||
| 30 | _3D, | ||
| 31 | ARRAY_3D, | ||
| 32 | CUBE, | ||
| 33 | ARRAY_CUBE, | ||
| 34 | }; | ||
| 35 | |||
| 36 | Shader::TextureType GetType(TextureType type, bool dc) { | ||
| 37 | switch (type) { | ||
| 38 | case TextureType::_1D: | ||
| 39 | return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; | ||
| 40 | case TextureType::ARRAY_1D: | ||
| 41 | return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; | ||
| 42 | case TextureType::_2D: | ||
| 43 | return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; | ||
| 44 | case TextureType::ARRAY_2D: | ||
| 45 | return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; | ||
| 46 | case TextureType::_3D: | ||
| 47 | return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; | ||
| 48 | case TextureType::ARRAY_3D: | ||
| 49 | throw NotImplementedException("3D array texture type"); | ||
| 50 | case TextureType::CUBE: | ||
| 51 | return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; | ||
| 52 | case TextureType::ARRAY_CUBE: | ||
| 53 | return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; | ||
| 54 | } | ||
| 55 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 56 | } | ||
| 57 | |||
| 58 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 59 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; | ||
| 60 | switch (type) { | ||
| 61 | case TextureType::_1D: | ||
| 62 | return v.F(reg); | ||
| 63 | case TextureType::ARRAY_1D: | ||
| 64 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); | ||
| 65 | case TextureType::_2D: | ||
| 66 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 67 | case TextureType::ARRAY_2D: | ||
| 68 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); | ||
| 69 | case TextureType::_3D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 71 | case TextureType::ARRAY_3D: | ||
| 72 | throw NotImplementedException("3D array texture type"); | ||
| 73 | case TextureType::CUBE: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_CUBE: | ||
| 76 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 77 | } | ||
| 78 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||
| 82 | switch (blod) { | ||
| 83 | case Blod::None: | ||
| 84 | return v.ir.Imm32(0.0f); | ||
| 85 | case Blod::LZ: | ||
| 86 | return v.ir.Imm32(0.0f); | ||
| 87 | case Blod::LB: | ||
| 88 | case Blod::LL: | ||
| 89 | case Blod::LBA: | ||
| 90 | case Blod::LLA: | ||
| 91 | return v.F(reg++); | ||
| 92 | case Blod::INVALIDBLOD4: | ||
| 93 | case Blod::INVALIDBLOD5: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid blod {}", blod); | ||
| 97 | } | ||
| 98 | |||
| 99 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 100 | const IR::U32 value{v.X(reg++)}; | ||
| 101 | switch (type) { | ||
| 102 | case TextureType::_1D: | ||
| 103 | case TextureType::ARRAY_1D: | ||
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); | ||
| 105 | case TextureType::_2D: | ||
| 106 | case TextureType::ARRAY_2D: | ||
| 107 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | ||
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); | ||
| 109 | case TextureType::_3D: | ||
| 110 | case TextureType::ARRAY_3D: | ||
| 111 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | ||
| 112 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), | ||
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); | ||
| 114 | case TextureType::CUBE: | ||
| 115 | case TextureType::ARRAY_CUBE: | ||
| 116 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 117 | } | ||
| 118 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 119 | } | ||
| 120 | |||
| 121 | bool HasExplicitLod(Blod blod) { | ||
| 122 | switch (blod) { | ||
| 123 | case Blod::LL: | ||
| 124 | case Blod::LLA: | ||
| 125 | case Blod::LZ: | ||
| 126 | return true; | ||
| 127 | default: | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||
| 133 | std::optional<u32> cbuf_offset) { | ||
| 134 | union { | ||
| 135 | u64 raw; | ||
| 136 | BitField<35, 1, u64> ndv; | ||
| 137 | BitField<49, 1, u64> nodep; | ||
| 138 | BitField<50, 1, u64> dc; | ||
| 139 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 140 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 141 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 142 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 143 | BitField<28, 3, TextureType> type; | ||
| 144 | BitField<31, 4, u64> mask; | ||
| 145 | } const tex{insn}; | ||
| 146 | |||
| 147 | if (lc) { | ||
| 148 | throw NotImplementedException("LC"); | ||
| 149 | } | ||
| 150 | const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||
| 151 | |||
| 152 | IR::Reg meta_reg{tex.meta_reg}; | ||
| 153 | IR::Value handle; | ||
| 154 | IR::Value offset; | ||
| 155 | IR::F32 dref; | ||
| 156 | IR::F32 lod_clamp; | ||
| 157 | if (cbuf_offset) { | ||
| 158 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 159 | } else { | ||
| 160 | handle = v.X(meta_reg++); | ||
| 161 | } | ||
| 162 | const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||
| 163 | if (aoffi) { | ||
| 164 | offset = MakeOffset(v, meta_reg, tex.type); | ||
| 165 | } | ||
| 166 | if (tex.dc != 0) { | ||
| 167 | dref = v.F(meta_reg++); | ||
| 168 | } | ||
| 169 | IR::TextureInstInfo info{}; | ||
| 170 | info.type.Assign(GetType(tex.type, tex.dc != 0)); | ||
| 171 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||
| 172 | info.has_lod_clamp.Assign(lc ? 1 : 0); | ||
| 173 | |||
| 174 | const IR::Value sample{[&]() -> IR::Value { | ||
| 175 | if (tex.dc == 0) { | ||
| 176 | if (HasExplicitLod(blod)) { | ||
| 177 | return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 178 | } else { | ||
| 179 | return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | if (HasExplicitLod(blod)) { | ||
| 183 | return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 184 | info); | ||
| 185 | } else { | ||
| 186 | return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 187 | info); | ||
| 188 | } | ||
| 189 | }()}; | ||
| 190 | |||
| 191 | for (int element = 0; element < 4; ++element) { | ||
| 192 | if (((tex.mask >> element) & 1) == 0) { | ||
| 193 | continue; | ||
| 194 | } | ||
| 195 | IR::F32 value; | ||
| 196 | if (tex.dc != 0) { | ||
| 197 | value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||
| 198 | } else { | ||
| 199 | value = IR::F32{v.ir.CompositeExtract(sample, element)}; | ||
| 200 | } | ||
| 201 | v.F(tex.dest_reg + element, value); | ||
| 202 | } | ||
| 203 | if (tex.sparse_pred != IR::Pred::PT) { | ||
| 204 | v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } // Anonymous namespace | ||
| 208 | |||
| 209 | void TranslatorVisitor::TEX(u64 insn) { | ||
| 210 | union { | ||
| 211 | u64 raw; | ||
| 212 | BitField<54, 1, u64> aoffi; | ||
| 213 | BitField<55, 3, Blod> blod; | ||
| 214 | BitField<58, 1, u64> lc; | ||
| 215 | BitField<36, 13, u64> cbuf_offset; | ||
| 216 | } const tex{insn}; | ||
| 217 | |||
| 218 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void TranslatorVisitor::TEX_b(u64 insn) { | ||
| 222 | union { | ||
| 223 | u64 raw; | ||
| 224 | BitField<36, 1, u64> aoffi; | ||
| 225 | BitField<37, 3, Blod> blod; | ||
| 226 | BitField<40, 1, u64> lc; | ||
| 227 | } const tex{insn}; | ||
| 228 | |||
| 229 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||
| 230 | } | ||
| 231 | |||
| 232 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6662ef4cd..960beadd4 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 82 | throw NotImplementedException("Constant buffer with non-immediate index"); | 82 | throw NotImplementedException("Constant buffer with non-immediate index"); |
| 83 | } | 83 | } |
| 84 | break; | 84 | break; |
| 85 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 86 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 87 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 88 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 89 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 90 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 91 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 92 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 93 | case IR::Opcode::ImageSampleImplicitLod: | ||
| 94 | case IR::Opcode::ImageSampleExplicitLod: | ||
| 95 | case IR::Opcode::ImageSampleDrefImplicitLod: | ||
| 96 | case IR::Opcode::ImageSampleDrefExplicitLod: { | ||
| 97 | const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; | ||
| 98 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || | ||
| 99 | type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; | ||
| 100 | info.uses_sparse_residency |= | ||
| 101 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 102 | break; | ||
| 103 | } | ||
| 85 | default: | 104 | default: |
| 86 | break; | 105 | break; |
| 87 | } | 106 | } |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 965e52135..2625c0bb2 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -226,6 +226,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, | |||
| 226 | } | 226 | } |
| 227 | // Reversed loops are more likely to find the right result | 227 | // Reversed loops are more likely to find the right result |
| 228 | for (size_t arg = inst->NumArgs(); arg--;) { | 228 | for (size_t arg = inst->NumArgs(); arg--;) { |
| 229 | IR::Block* inst_block{block}; | ||
| 229 | if (inst->Opcode() == IR::Opcode::Phi) { | 230 | if (inst->Opcode() == IR::Opcode::Phi) { |
| 230 | // If we are going through a phi node, mark the current block as visited | 231 | // If we are going through a phi node, mark the current block as visited |
| 231 | visited.insert(block); | 232 | visited.insert(block); |
| @@ -235,15 +236,11 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, | |||
| 235 | // Already visited, skip | 236 | // Already visited, skip |
| 236 | continue; | 237 | continue; |
| 237 | } | 238 | } |
| 238 | const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; | 239 | inst_block = phi_block; |
| 239 | if (storage_buffer) { | 240 | } |
| 240 | return *storage_buffer; | 241 | const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; |
| 241 | } | 242 | if (storage_buffer) { |
| 242 | } else { | 243 | return *storage_buffer; |
| 243 | const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; | ||
| 244 | if (storage_buffer) { | ||
| 245 | return *storage_buffer; | ||
| 246 | } | ||
| 247 | } | 244 | } |
| 248 | } | 245 | } |
| 249 | return std::nullopt; | 246 | return std::nullopt; |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 38106308c..3b7e7306b 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <span> | 7 | #include <span> |
| 8 | 8 | ||
| 9 | #include "shader_recompiler/environment.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | 10 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 10 | #include "shader_recompiler/frontend/ir/function.h" | 11 | #include "shader_recompiler/frontend/ir/function.h" |
| 11 | #include "shader_recompiler/frontend/ir/program.h" | 12 | #include "shader_recompiler/frontend/ir/program.h" |
| @@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); | |||
| 26 | void IdentityRemovalPass(IR::Function& function); | 27 | void IdentityRemovalPass(IR::Function& function); |
| 27 | void LowerFp16ToFp32(IR::Program& program); | 28 | void LowerFp16ToFp32(IR::Program& program); |
| 28 | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); | 29 | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); |
| 30 | void TexturePass(Environment& env, IR::Program& program); | ||
| 29 | void VerificationPass(const IR::Function& function); | 31 | void VerificationPass(const IR::Function& function); |
| 30 | 32 | ||
| 31 | } // namespace Shader::Optimization | 33 | } // namespace Shader::Optimization |
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..80e4ad6a9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -0,0 +1,199 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include <boost/container/flat_set.hpp> | ||
| 8 | #include <boost/container/small_vector.hpp> | ||
| 9 | |||
| 10 | #include "shader_recompiler/environment.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 13 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 14 | #include "shader_recompiler/shader_info.h" | ||
| 15 | |||
| 16 | namespace Shader::Optimization { | ||
| 17 | namespace { | ||
| 18 | struct ConstBufferAddr { | ||
| 19 | u32 index; | ||
| 20 | u32 offset; | ||
| 21 | }; | ||
| 22 | |||
| 23 | struct TextureInst { | ||
| 24 | ConstBufferAddr cbuf; | ||
| 25 | IR::Inst* inst; | ||
| 26 | IR::Block* block; | ||
| 27 | }; | ||
| 28 | |||
| 29 | using TextureInstVector = boost::container::small_vector<TextureInst, 24>; | ||
| 30 | |||
| 31 | using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>, | ||
| 32 | boost::container::small_vector<IR::Block*, 2>>; | ||
| 33 | |||
| 34 | IR::Opcode IndexedInstruction(const IR::Inst& inst) { | ||
| 35 | switch (inst.Opcode()) { | ||
| 36 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 37 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 38 | return IR::Opcode::ImageSampleImplicitLod; | ||
| 39 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 40 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 41 | return IR::Opcode::ImageSampleExplicitLod; | ||
| 42 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 43 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 44 | return IR::Opcode::ImageSampleDrefImplicitLod; | ||
| 45 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 46 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 47 | return IR::Opcode::ImageSampleDrefExplicitLod; | ||
| 48 | default: | ||
| 49 | return IR::Opcode::Void; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | bool IsBindless(const IR::Inst& inst) { | ||
| 54 | switch (inst.Opcode()) { | ||
| 55 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 56 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 57 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 58 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 59 | return true; | ||
| 60 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 61 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 62 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 63 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 64 | return false; | ||
| 65 | default: | ||
| 66 | throw InvalidArgument("Invalid opcode {}", inst.Opcode()); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | bool IsTextureInstruction(const IR::Inst& inst) { | ||
| 71 | return IndexedInstruction(inst) != IR::Opcode::Void; | ||
| 72 | } | ||
| 73 | |||
| 74 | std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value, | ||
| 75 | VisitedBlocks& visited) { | ||
| 76 | if (value.IsImmediate()) { | ||
| 77 | // Immediates can't be a storage buffer | ||
| 78 | return std::nullopt; | ||
| 79 | } | ||
| 80 | const IR::Inst* const inst{value.InstRecursive()}; | ||
| 81 | if (inst->Opcode() == IR::Opcode::GetCbuf) { | ||
| 82 | const IR::Value index{inst->Arg(0)}; | ||
| 83 | const IR::Value offset{inst->Arg(1)}; | ||
| 84 | if (!index.IsImmediate()) { | ||
| 85 | // Reading a bindless texture from variable indices is valid | ||
| 86 | // but not supported here at the moment | ||
| 87 | return std::nullopt; | ||
| 88 | } | ||
| 89 | if (!offset.IsImmediate()) { | ||
| 90 | // TODO: Support arrays of textures | ||
| 91 | return std::nullopt; | ||
| 92 | } | ||
| 93 | return ConstBufferAddr{ | ||
| 94 | .index{index.U32()}, | ||
| 95 | .offset{offset.U32()}, | ||
| 96 | }; | ||
| 97 | } | ||
| 98 | // Reversed loops are more likely to find the right result | ||
| 99 | for (size_t arg = inst->NumArgs(); arg--;) { | ||
| 100 | IR::Block* inst_block{block}; | ||
| 101 | if (inst->Opcode() == IR::Opcode::Phi) { | ||
| 102 | // If we are going through a phi node, mark the current block as visited | ||
| 103 | visited.insert(block); | ||
| 104 | // and skip already visited blocks to avoid looping forever | ||
| 105 | IR::Block* const phi_block{inst->PhiBlock(arg)}; | ||
| 106 | if (visited.contains(phi_block)) { | ||
| 107 | // Already visited, skip | ||
| 108 | continue; | ||
| 109 | } | ||
| 110 | inst_block = phi_block; | ||
| 111 | } | ||
| 112 | const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; | ||
| 113 | if (storage_buffer) { | ||
| 114 | return *storage_buffer; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | return std::nullopt; | ||
| 118 | } | ||
| 119 | |||
| 120 | TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { | ||
| 121 | ConstBufferAddr addr; | ||
| 122 | if (IsBindless(inst)) { | ||
| 123 | VisitedBlocks visited; | ||
| 124 | const std::optional<ConstBufferAddr> track_addr{Track(block, IR::Value{&inst}, visited)}; | ||
| 125 | if (!track_addr) { | ||
| 126 | throw NotImplementedException("Failed to track bindless texture constant buffer"); | ||
| 127 | } | ||
| 128 | addr = *track_addr; | ||
| 129 | } else { | ||
| 130 | addr = ConstBufferAddr{ | ||
| 131 | .index{env.TextureBoundBuffer()}, | ||
| 132 | .offset{inst.Arg(0).U32()}, | ||
| 133 | }; | ||
| 134 | } | ||
| 135 | return TextureInst{ | ||
| 136 | .cbuf{addr}, | ||
| 137 | .inst{&inst}, | ||
| 138 | .block{block}, | ||
| 139 | }; | ||
| 140 | } | ||
| 141 | |||
| 142 | class Descriptors { | ||
| 143 | public: | ||
| 144 | explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} | ||
| 145 | |||
| 146 | u32 Add(const TextureDescriptor& descriptor) { | ||
| 147 | // TODO: Handle arrays | ||
| 148 | auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { | ||
| 149 | return descriptor.cbuf_index == existing.cbuf_index && | ||
| 150 | descriptor.cbuf_offset == existing.cbuf_offset && | ||
| 151 | descriptor.type == existing.type; | ||
| 152 | })}; | ||
| 153 | if (it != descriptors.end()) { | ||
| 154 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 155 | } | ||
| 156 | descriptors.push_back(descriptor); | ||
| 157 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 158 | } | ||
| 159 | |||
| 160 | private: | ||
| 161 | TextureDescriptors& descriptors; | ||
| 162 | }; | ||
| 163 | } // Anonymous namespace | ||
| 164 | |||
| 165 | void TexturePass(Environment& env, IR::Program& program) { | ||
| 166 | TextureInstVector to_replace; | ||
| 167 | for (IR::Function& function : program.functions) { | ||
| 168 | for (IR::Block* const block : function.post_order_blocks) { | ||
| 169 | for (IR::Inst& inst : block->Instructions()) { | ||
| 170 | if (!IsTextureInstruction(inst)) { | ||
| 171 | continue; | ||
| 172 | } | ||
| 173 | to_replace.push_back(MakeInst(env, block, inst)); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | // Sort instructions to visit textures by constant buffer index, then by offset | ||
| 178 | std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { | ||
| 179 | return lhs.cbuf.offset < rhs.cbuf.offset; | ||
| 180 | }); | ||
| 181 | std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { | ||
| 182 | return lhs.cbuf.index < rhs.cbuf.index; | ||
| 183 | }); | ||
| 184 | Descriptors descriptors{program.info.texture_descriptors}; | ||
| 185 | for (TextureInst& texture_inst : to_replace) { | ||
| 186 | // TODO: Handle arrays | ||
| 187 | IR::Inst* const inst{texture_inst.inst}; | ||
| 188 | const u32 index{descriptors.Add(TextureDescriptor{ | ||
| 189 | .type{inst->Flags<IR::TextureInstInfo>().type}, | ||
| 190 | .cbuf_index{texture_inst.cbuf.index}, | ||
| 191 | .cbuf_offset{texture_inst.cbuf.offset}, | ||
| 192 | .count{1}, | ||
| 193 | })}; | ||
| 194 | inst->ReplaceOpcode(IndexedInstruction(*inst)); | ||
| 195 | inst->SetArg(0, IR::Value{index}); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 8766bf13e..103a2f0b4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -8,25 +8,51 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | 10 | ||
| 11 | #include <boost/container/small_vector.hpp> | ||
| 11 | #include <boost/container/static_vector.hpp> | 12 | #include <boost/container/static_vector.hpp> |
| 12 | 13 | ||
| 13 | namespace Shader { | 14 | namespace Shader { |
| 14 | 15 | ||
| 16 | enum class TextureType : u32 { | ||
| 17 | Color1D, | ||
| 18 | ColorArray1D, | ||
| 19 | Color2D, | ||
| 20 | ColorArray2D, | ||
| 21 | Color3D, | ||
| 22 | ColorCube, | ||
| 23 | ColorArrayCube, | ||
| 24 | Shadow1D, | ||
| 25 | ShadowArray1D, | ||
| 26 | Shadow2D, | ||
| 27 | ShadowArray2D, | ||
| 28 | Shadow3D, | ||
| 29 | ShadowCube, | ||
| 30 | ShadowArrayCube, | ||
| 31 | }; | ||
| 32 | |||
| 33 | struct TextureDescriptor { | ||
| 34 | TextureType type; | ||
| 35 | u32 cbuf_index; | ||
| 36 | u32 cbuf_offset; | ||
| 37 | u32 count; | ||
| 38 | }; | ||
| 39 | using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; | ||
| 40 | |||
| 41 | struct ConstantBufferDescriptor { | ||
| 42 | u32 index; | ||
| 43 | u32 count; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct StorageBufferDescriptor { | ||
| 47 | u32 cbuf_index; | ||
| 48 | u32 cbuf_offset; | ||
| 49 | u32 count; | ||
| 50 | }; | ||
| 51 | |||
| 15 | struct Info { | 52 | struct Info { |
| 16 | static constexpr size_t MAX_CBUFS{18}; | 53 | static constexpr size_t MAX_CBUFS{18}; |
| 17 | static constexpr size_t MAX_SSBOS{16}; | 54 | static constexpr size_t MAX_SSBOS{16}; |
| 18 | 55 | ||
| 19 | struct ConstantBufferDescriptor { | ||
| 20 | u32 index; | ||
| 21 | u32 count; | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct StorageBufferDescriptor { | ||
| 25 | u32 cbuf_index; | ||
| 26 | u32 cbuf_offset; | ||
| 27 | u32 count; | ||
| 28 | }; | ||
| 29 | |||
| 30 | bool uses_workgroup_id{}; | 56 | bool uses_workgroup_id{}; |
| 31 | bool uses_local_invocation_id{}; | 57 | bool uses_local_invocation_id{}; |
| 32 | bool uses_fp16{}; | 58 | bool uses_fp16{}; |
| @@ -35,12 +61,16 @@ struct Info { | |||
| 35 | bool uses_fp16_denorms_preserve{}; | 61 | bool uses_fp16_denorms_preserve{}; |
| 36 | bool uses_fp32_denorms_flush{}; | 62 | bool uses_fp32_denorms_flush{}; |
| 37 | bool uses_fp32_denorms_preserve{}; | 63 | bool uses_fp32_denorms_preserve{}; |
| 64 | bool uses_image_1d{}; | ||
| 65 | bool uses_sampled_1d{}; | ||
| 66 | bool uses_sparse_residency{}; | ||
| 38 | 67 | ||
| 39 | u32 constant_buffer_mask{}; | 68 | u32 constant_buffer_mask{}; |
| 40 | 69 | ||
| 41 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | 70 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> |
| 42 | constant_buffer_descriptors; | 71 | constant_buffer_descriptors; |
| 43 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | 72 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; |
| 73 | TextureDescriptors texture_descriptors; | ||
| 44 | }; | 74 | }; |
| 45 | 75 | ||
| 46 | } // namespace Shader | 76 | } // namespace Shader |