diff options
Diffstat (limited to 'src')
5 files changed, 49 insertions, 53 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 1eba9cc00..03d2ec73e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -72,20 +72,19 @@ public: | |||
| 72 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, | 72 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, |
| 73 | Id offset, Id lod_clamp) { | 73 | Id offset, Id lod_clamp) { |
| 74 | if (Sirit::ValidId(derivates)) { | 74 | if (Sirit::ValidId(derivates)) { |
| 75 | boost::container::static_vector<Id, 3> deriv_x_accum; | ||
| 76 | boost::container::static_vector<Id, 3> deriv_y_accum; | ||
| 77 | for (size_t i = 0; i < num_derivates; i++) { | ||
| 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); | ||
| 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); | ||
| 80 | } | ||
| 81 | Id derivates_X = ctx.OpCompositeConstruct( | ||
| 82 | ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()}); | ||
| 83 | Id derivates_Y = ctx.OpCompositeConstruct( | ||
| 84 | ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()}); | ||
| 85 | Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); | ||
| 86 | } else { | ||
| 87 | throw LogicError("Derivates must be present"); | 75 | throw LogicError("Derivates must be present"); |
| 88 | } | 76 | } |
| 77 | boost::container::static_vector<Id, 3> deriv_x_accum; | ||
| 78 | boost::container::static_vector<Id, 3> deriv_y_accum; | ||
| 79 | for (size_t i = 0; i < num_derivates; i++) { | ||
| 80 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); | ||
| 81 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); | ||
| 82 | } | ||
| 83 | const Id derivates_X{ctx.OpCompositeConstruct( | ||
| 84 | ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; | ||
| 85 | const Id derivates_Y{ctx.OpCompositeConstruct( | ||
| 86 | ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; | ||
| 87 | Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); | ||
| 89 | if (Sirit::ValidId(offset)) { | 88 | if (Sirit::ValidId(offset)) { |
| 90 | Add(spv::ImageOperandsMask::Offset, offset); | 89 | Add(spv::ImageOperandsMask::Offset, offset); |
| 91 | } | 90 | } |
| @@ -100,10 +99,10 @@ public: | |||
| 100 | operands.push_back(value); | 99 | operands.push_back(value); |
| 101 | } | 100 | } |
| 102 | 101 | ||
| 103 | void Add(spv::ImageOperandsMask new_mask, Id value, Id value_2) { | 102 | void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { |
| 104 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | 103 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | |
| 105 | static_cast<unsigned>(new_mask)); | 104 | static_cast<unsigned>(new_mask)); |
| 106 | operands.push_back(value); | 105 | operands.push_back(value_1); |
| 107 | operands.push_back(value_2); | 106 | operands.push_back(value_2); |
| 108 | } | 107 | } |
| 109 | 108 | ||
| @@ -345,7 +344,8 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coo | |||
| 345 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 344 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 346 | Id derivates, Id offset, Id lod_clamp) { | 345 | Id derivates, Id offset, Id lod_clamp) { |
| 347 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 346 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 348 | const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); | 347 | const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, |
| 348 | offset, lod_clamp); | ||
| 349 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | 349 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, |
| 350 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), | 350 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), |
| 351 | coords, operands.Mask(), operands.Span()); | 351 | coords, operands.Mask(), operands.Span()); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index 00768e167..c66468a48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -47,7 +47,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { | |||
| 47 | 47 | ||
| 48 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { | 48 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { |
| 49 | const IR::U32 value{v.X(reg)}; | 49 | const IR::U32 value{v.X(reg)}; |
| 50 | const u32 base = has_lod_clamp ? 12 : 16; | 50 | const u32 base{has_lod_clamp ? 12U : 16U}; |
| 51 | return v.ir.CompositeConstruct( | 51 | return v.ir.CompositeConstruct( |
| 52 | v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), | 52 | v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), |
| 53 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); | 53 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); |
| @@ -74,20 +74,21 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | IR::Value coords; | 76 | IR::Value coords; |
| 77 | u32 num_derivates; | 77 | u32 num_derivates{}; |
| 78 | IR::Reg base_reg = txd.coord_reg; | 78 | IR::Reg base_reg{txd.coord_reg}; |
| 79 | IR::Reg last_reg; | 79 | IR::Reg last_reg; |
| 80 | IR::Value handle; | 80 | IR::Value handle; |
| 81 | if (!is_bindless) { | 81 | if (is_bindless) { |
| 82 | handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); | ||
| 83 | } else { | ||
| 84 | handle = v.X(base_reg++); | 82 | handle = v.X(base_reg++); |
| 83 | } else { | ||
| 84 | handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); | ||
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | const auto read_array{[&]() -> IR::F32 { | 87 | const auto read_array{[&]() -> IR::F32 { |
| 88 | return v.ir.ConvertUToF(32, 16, | 88 | const IR::U32 base{v.ir.Imm32(0)}; |
| 89 | v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), | 89 | const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; |
| 90 | v.ir.Imm32(has_lod_clamp ? 12 : 16))); | 90 | const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; |
| 91 | return v.ir.ConvertUToF(32, 16, array_index); | ||
| 91 | }}; | 92 | }}; |
| 92 | switch (txd.type) { | 93 | switch (txd.type) { |
| 93 | case TextureType::_1D: { | 94 | case TextureType::_1D: { |
| @@ -141,19 +142,20 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 141 | 142 | ||
| 142 | IR::F32 lod_clamp; | 143 | IR::F32 lod_clamp; |
| 143 | if (has_lod_clamp) { | 144 | if (has_lod_clamp) { |
| 144 | const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast<f32>(0x3b800000U)); | 145 | // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. |
| 145 | const IR::F32 tmp = v.ir.ConvertUToF( | 146 | // to convert a fixed point, float(value) / float(1 << fixed_point) |
| 146 | 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); | 147 | // in this case the fixed_point is 8. |
| 147 | lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); | 148 | const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; |
| 149 | const IR::F32 fixp_lc{v.ir.ConvertUToF( | ||
| 150 | 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; | ||
| 151 | lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); | ||
| 148 | } | 152 | } |
| 149 | 153 | ||
| 150 | IR::TextureInstInfo info{}; | 154 | IR::TextureInstInfo info{}; |
| 151 | info.type.Assign(GetType(txd.type, false)); | 155 | info.type.Assign(GetType(txd.type, false)); |
| 152 | info.num_derivates.Assign(num_derivates); | 156 | info.num_derivates.Assign(num_derivates); |
| 153 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | 157 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); |
| 154 | const IR::Value sample{[&]() -> IR::Value { | 158 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; |
| 155 | return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); | ||
| 156 | }()}; | ||
| 157 | 159 | ||
| 158 | IR::Reg dest_reg{txd.dest_reg}; | 160 | IR::Reg dest_reg{txd.dest_reg}; |
| 159 | for (size_t element = 0; element < 4; ++element) { | 161 | for (size_t element = 0; element < 4; ++element) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index df38f87a3..987b7ec34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp | |||
| @@ -117,10 +117,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 117 | IR::Value offset; | 117 | IR::Value offset; |
| 118 | IR::U32 lod; | 118 | IR::U32 lod; |
| 119 | IR::U32 multisample; | 119 | IR::U32 multisample; |
| 120 | if (!is_bindless) { | 120 | if (is_bindless) { |
| 121 | handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||
| 122 | } else { | ||
| 123 | handle = v.X(meta_reg++); | 121 | handle = v.X(meta_reg++); |
| 122 | } else { | ||
| 123 | handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||
| 124 | } | 124 | } |
| 125 | if (tld.lod != 0) { | 125 | if (tld.lod != 0) { |
| 126 | lod = v.X(meta_reg++); | 126 | lod = v.X(meta_reg++); |
| @@ -138,9 +138,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 138 | } | 138 | } |
| 139 | IR::TextureInstInfo info{}; | 139 | IR::TextureInstInfo info{}; |
| 140 | info.type.Assign(GetType(tld.type, false)); | 140 | info.type.Assign(GetType(tld.type, false)); |
| 141 | const IR::Value sample{[&]() -> IR::Value { | 141 | const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; |
| 142 | return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); | ||
| 143 | }()}; | ||
| 144 | 142 | ||
| 145 | IR::Reg dest_reg{tld.dest_reg}; | 143 | IR::Reg dest_reg{tld.dest_reg}; |
| 146 | for (size_t element = 0; element < 4; ++element) { | 144 | for (size_t element = 0; element < 4; ++element) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index ee13ede30..b6efc04f0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp | |||
| @@ -81,39 +81,35 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 81 | BitField<36, 13, u64> cbuf_offset; | 81 | BitField<36, 13, u64> cbuf_offset; |
| 82 | } const tmml{insn}; | 82 | } const tmml{insn}; |
| 83 | 83 | ||
| 84 | if ((tmml.mask & 0xC) != 0) { | 84 | if ((tmml.mask & 0b1100) != 0) { |
| 85 | throw NotImplementedException("TMML BA results are not implmented"); | 85 | throw NotImplementedException("TMML BA results are not implmented"); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | IR::F32 transform_constant = v.ir.Imm32(256.0f); | 88 | IR::F32 transform_constant{v.ir.Imm32(256.0f)}; |
| 89 | 89 | ||
| 90 | const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; | 90 | const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; |
| 91 | 91 | ||
| 92 | IR::U32 handle; | 92 | IR::U32 handle; |
| 93 | IR::Reg meta_reg{tmml.meta_reg}; | 93 | IR::Reg meta_reg{tmml.meta_reg}; |
| 94 | if (!is_bindless) { | 94 | if (is_bindless) { |
| 95 | handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); | ||
| 96 | } else { | ||
| 97 | handle = v.X(meta_reg++); | 95 | handle = v.X(meta_reg++); |
| 96 | } else { | ||
| 97 | handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); | ||
| 98 | } | 98 | } |
| 99 | IR::TextureInstInfo info{}; | 99 | IR::TextureInstInfo info{}; |
| 100 | info.type.Assign(GetType(tmml.type, false)); | 100 | info.type.Assign(GetType(tmml.type, false)); |
| 101 | const IR::Value sample{ | 101 | const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; |
| 102 | [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; | ||
| 103 | 102 | ||
| 104 | const IR::FpControl fp_control{ | ||
| 105 | .no_contraction{false}, | ||
| 106 | .rounding{IR::FpRounding::RP}, | ||
| 107 | .fmz_mode{IR::FmzMode::FTZ}, | ||
| 108 | }; | ||
| 109 | IR::Reg dest_reg{tmml.dest_reg}; | 103 | IR::Reg dest_reg{tmml.dest_reg}; |
| 110 | for (size_t element = 0; element < 4; ++element) { | 104 | for (size_t element = 0; element < 4; ++element) { |
| 111 | if (((tmml.mask >> element) & 1) == 0) { | 105 | if (((tmml.mask >> element) & 1) == 0) { |
| 112 | continue; | 106 | continue; |
| 113 | } | 107 | } |
| 114 | IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; | 108 | IR::F32 value{v.ir.CompositeExtract(sample, element)}; |
| 115 | v.F(dest_reg, | 109 | if (element < 2) { |
| 116 | element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); | 110 | value = v.ir.FPMul(value, transform_constant); |
| 111 | } | ||
| 112 | v.F(dest_reg, value); | ||
| 117 | ++dest_reg; | 113 | ++dest_reg; |
| 118 | } | 114 | } |
| 119 | } | 115 | } |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 05e27c687..882eff880 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -64,7 +64,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 64 | } | 64 | } |
| 65 | const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); | 65 | const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); |
| 66 | if (it != map_ranges.end()) { | 66 | if (it != map_ranges.end()) { |
| 67 | // ASSERT(it->first == gpu_addr); | 67 | ASSERT(it->first == gpu_addr); |
| 68 | map_ranges.erase(it); | 68 | map_ranges.erase(it); |
| 69 | } else { | 69 | } else { |
| 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); | 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); |