diff options
Diffstat (limited to 'src/shader_recompiler')
10 files changed, 171 insertions, 89 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index db9c94ce8..0cd87a48f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 321 | case IR::Attribute::PositionY: | 321 | case IR::Attribute::PositionY: |
| 322 | case IR::Attribute::PositionZ: | 322 | case IR::Attribute::PositionZ: |
| 323 | case IR::Attribute::PositionW: | 323 | case IR::Attribute::PositionW: |
| 324 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, | 324 | return ctx.OpLoad( |
| 325 | ctx.Const(element))); | 325 | ctx.F32[1], |
| 326 | ctx.need_input_position_indirect | ||
| 327 | ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, | ||
| 328 | ctx.Const(element)) | ||
| 329 | : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); | ||
| 326 | case IR::Attribute::InstanceId: | 330 | case IR::Attribute::InstanceId: |
| 327 | if (ctx.profile.support_vertex_instance_id) { | 331 | if (ctx.profile.support_vertex_instance_id) { |
| 328 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); | 332 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 2c90f2368..c5db19d09 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | |||
| 58 | ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); | 58 | ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { | 61 | Id AddPartitionBase(EmitContext& ctx, Id thread_id) { |
| 62 | const Id thirty_two{ctx.Const(32u)}; | 62 | const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))}; |
| 63 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; | 63 | const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))}; |
| 64 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | 64 | return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base); |
| 65 | return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 66 | } | 65 | } |
| 67 | } // Anonymous namespace | 66 | } // Anonymous namespace |
| 68 | 67 | ||
| @@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { | |||
| 145 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 144 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 146 | Id segmentation_mask) { | 145 | Id segmentation_mask) { |
| 147 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | 146 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; |
| 148 | const Id thread_id{GetThreadId(ctx)}; | 147 | const Id thread_id{EmitLaneId(ctx)}; |
| 149 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 150 | const Id thirty_two{ctx.Const(32u)}; | ||
| 151 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; | ||
| 152 | const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; | ||
| 153 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 154 | index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); | ||
| 155 | clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 156 | } | ||
| 157 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | 148 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; |
| 158 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | 149 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; |
| 159 | 150 | ||
| 160 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | 151 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; |
| 161 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | 152 | Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; |
| 162 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 153 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 163 | 154 | ||
| 155 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 156 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 157 | } | ||
| 158 | |||
| 164 | SetInBoundsFlag(inst, in_range); | 159 | SetInBoundsFlag(inst, in_range); |
| 165 | return SelectValue(ctx, in_range, value, src_thread_id); | 160 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 166 | } | 161 | } |
| 167 | 162 | ||
| 168 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 163 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 169 | Id segmentation_mask) { | 164 | Id segmentation_mask) { |
| 170 | const Id thread_id{GetThreadId(ctx)}; | 165 | const Id thread_id{EmitLaneId(ctx)}; |
| 171 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 172 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 173 | } | ||
| 174 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 166 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 175 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | 167 | Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; |
| 176 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 168 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 177 | 169 | ||
| 170 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 171 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 172 | } | ||
| 173 | |||
| 178 | SetInBoundsFlag(inst, in_range); | 174 | SetInBoundsFlag(inst, in_range); |
| 179 | return SelectValue(ctx, in_range, value, src_thread_id); | 175 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 180 | } | 176 | } |
| 181 | 177 | ||
| 182 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 178 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 183 | Id segmentation_mask) { | 179 | Id segmentation_mask) { |
| 184 | const Id thread_id{GetThreadId(ctx)}; | 180 | const Id thread_id{EmitLaneId(ctx)}; |
| 185 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 186 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 187 | } | ||
| 188 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 181 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 189 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | 182 | Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; |
| 190 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 183 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 191 | 184 | ||
| 185 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 186 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 187 | } | ||
| 188 | |||
| 192 | SetInBoundsFlag(inst, in_range); | 189 | SetInBoundsFlag(inst, in_range); |
| 193 | return SelectValue(ctx, in_range, value, src_thread_id); | 190 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 194 | } | 191 | } |
| 195 | 192 | ||
| 196 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 193 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 197 | Id segmentation_mask) { | 194 | Id segmentation_mask) { |
| 198 | const Id thread_id{GetThreadId(ctx)}; | 195 | const Id thread_id{EmitLaneId(ctx)}; |
| 199 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 200 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 201 | } | ||
| 202 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 196 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 203 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | 197 | Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; |
| 204 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 198 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 205 | 199 | ||
| 200 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 201 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 202 | } | ||
| 203 | |||
| 206 | SetInBoundsFlag(inst, in_range); | 204 | SetInBoundsFlag(inst, in_range); |
| 207 | return SelectValue(ctx, in_range, value, src_thread_id); | 205 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 208 | } | 206 | } |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ecb2db494..a0c155fdb 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||
| 544 | U16 = Name(TypeInt(16, false), "u16"); | 544 | U16 = Name(TypeInt(16, false), "u16"); |
| 545 | S16 = Name(TypeInt(16, true), "s16"); | 545 | S16 = Name(TypeInt(16, true), "s16"); |
| 546 | } | 546 | } |
| 547 | if (info.uses_int64) { | 547 | if (info.uses_int64 && profile.support_int64) { |
| 548 | AddCapability(spv::Capability::Int64); | 548 | AddCapability(spv::Capability::Int64); |
| 549 | U64 = Name(TypeInt(64, false), "u64"); | 549 | U64 = Name(TypeInt(64, false), "u64"); |
| 550 | } | 550 | } |
| @@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | |||
| 721 | size_t label_index{0}; | 721 | size_t label_index{0}; |
| 722 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | 722 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { |
| 723 | AddLabel(labels[label_index]); | 723 | AddLabel(labels[label_index]); |
| 724 | const Id pointer{is_array | 724 | const Id pointer{[&]() { |
| 725 | ? OpAccessChain(input_f32, input_position, vertex, masked_index) | 725 | if (need_input_position_indirect) { |
| 726 | : OpAccessChain(input_f32, input_position, masked_index)}; | 726 | if (is_array) |
| 727 | return OpAccessChain(input_f32, input_position, vertex, u32_zero_value, | ||
| 728 | masked_index); | ||
| 729 | else | ||
| 730 | return OpAccessChain(input_f32, input_position, u32_zero_value, | ||
| 731 | masked_index); | ||
| 732 | } else { | ||
| 733 | if (is_array) | ||
| 734 | return OpAccessChain(input_f32, input_position, vertex, masked_index); | ||
| 735 | else | ||
| 736 | return OpAccessChain(input_f32, input_position, masked_index); | ||
| 737 | } | ||
| 738 | }()}; | ||
| 727 | const Id result{OpLoad(F32[1], pointer)}; | 739 | const Id result{OpLoad(F32[1], pointer)}; |
| 728 | OpReturnValue(result); | 740 | OpReturnValue(result); |
| 729 | ++label_index; | 741 | ++label_index; |
| @@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1367 | Decorate(layer, spv::Decoration::Flat); | 1379 | Decorate(layer, spv::Decoration::Flat); |
| 1368 | } | 1380 | } |
| 1369 | if (loads.AnyComponent(IR::Attribute::PositionX)) { | 1381 | if (loads.AnyComponent(IR::Attribute::PositionX)) { |
| 1370 | const bool is_fragment{stage != Stage::Fragment}; | 1382 | const bool is_fragment{stage == Stage::Fragment}; |
| 1371 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; | 1383 | if (!is_fragment && profile.has_broken_spirv_position_input) { |
| 1372 | input_position = DefineInput(*this, F32[4], true, built_in); | 1384 | need_input_position_indirect = true; |
| 1373 | if (profile.support_geometry_shader_passthrough) { | 1385 | |
| 1374 | if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | 1386 | const Id input_position_struct = TypeStruct(F32[4]); |
| 1375 | Decorate(input_position, spv::Decoration::PassthroughNV); | 1387 | input_position = DefineInput(*this, input_position_struct, true); |
| 1388 | |||
| 1389 | MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn, | ||
| 1390 | static_cast<unsigned>(spv::BuiltIn::Position)); | ||
| 1391 | Decorate(input_position_struct, spv::Decoration::Block); | ||
| 1392 | } else { | ||
| 1393 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord | ||
| 1394 | : spv::BuiltIn::Position}; | ||
| 1395 | input_position = DefineInput(*this, F32[4], true, built_in); | ||
| 1396 | |||
| 1397 | if (profile.support_geometry_shader_passthrough) { | ||
| 1398 | if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 1399 | Decorate(input_position, spv::Decoration::PassthroughNV); | ||
| 1400 | } | ||
| 1376 | } | 1401 | } |
| 1377 | } | 1402 | } |
| 1378 | } | 1403 | } |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4414a5169..dbc5c55b9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h | |||
| @@ -280,6 +280,7 @@ public: | |||
| 280 | Id write_global_func_u32x2{}; | 280 | Id write_global_func_u32x2{}; |
| 281 | Id write_global_func_u32x4{}; | 281 | Id write_global_func_u32x4{}; |
| 282 | 282 | ||
| 283 | bool need_input_position_indirect{}; | ||
| 283 | Id input_position{}; | 284 | Id input_position{}; |
| 284 | std::array<Id, 32> input_generics{}; | 285 | std::array<Id, 32> input_generics{}; |
| 285 | 286 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ac159d24b..a42453e90 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( | |||
| 171 | } | 171 | } |
| 172 | return mapping; | 172 | return mapping; |
| 173 | } | 173 | } |
| 174 | |||
| 175 | void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program, | ||
| 176 | const Shader::VaryingState& passthrough_mask, | ||
| 177 | bool passthrough_position, | ||
| 178 | std::optional<IR::Attribute> passthrough_layer_attr) { | ||
| 179 | for (u32 i = 0; i < program.output_vertices; i++) { | ||
| 180 | // Assign generics from input | ||
| 181 | for (u32 j = 0; j < 32; j++) { | ||
| 182 | if (!passthrough_mask.Generic(j)) { | ||
| 183 | continue; | ||
| 184 | } | ||
| 185 | |||
| 186 | const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); | ||
| 187 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 188 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 189 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 190 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 191 | } | ||
| 192 | |||
| 193 | if (passthrough_position) { | ||
| 194 | // Assign position from input | ||
| 195 | const IR::Attribute attr = IR::Attribute::PositionX; | ||
| 196 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 197 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 198 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 199 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (passthrough_layer_attr) { | ||
| 203 | // Assign layer | ||
| 204 | ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr), | ||
| 205 | ir.Imm32(0)); | ||
| 206 | } | ||
| 207 | |||
| 208 | // Emit vertex | ||
| 209 | ir.EmitVertex(ir.Imm32(0)); | ||
| 210 | } | ||
| 211 | ir.EndPrimitive(ir.Imm32(0)); | ||
| 212 | } | ||
| 213 | |||
| 214 | u32 GetOutputTopologyVertices(OutputTopology output_topology) { | ||
| 215 | switch (output_topology) { | ||
| 216 | case OutputTopology::PointList: | ||
| 217 | return 1; | ||
| 218 | case OutputTopology::LineStrip: | ||
| 219 | return 2; | ||
| 220 | default: | ||
| 221 | return 3; | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) { | ||
| 226 | for (IR::Block* const block : program.blocks) { | ||
| 227 | for (IR::Inst& inst : block->Instructions()) { | ||
| 228 | if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||
| 229 | IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 230 | EmitGeometryPassthrough( | ||
| 231 | ir, program, program.info.passthrough, | ||
| 232 | program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {}); | ||
| 233 | } | ||
| 234 | } | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 174 | } // Anonymous namespace | 238 | } // Anonymous namespace |
| 175 | 239 | ||
| 176 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 240 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| @@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 195 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; | 259 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; |
| 196 | if (program.is_geometry_passthrough) { | 260 | if (program.is_geometry_passthrough) { |
| 197 | const auto& mask{env.GpPassthroughMask()}; | 261 | const auto& mask{env.GpPassthroughMask()}; |
| 198 | for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { | 262 | for (size_t i = 0; i < mask.size() * 32; ++i) { |
| 199 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; | 263 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; |
| 200 | } | 264 | } |
| 265 | |||
| 266 | if (!host_info.support_geometry_shader_passthrough) { | ||
| 267 | program.output_vertices = GetOutputTopologyVertices(program.output_topology); | ||
| 268 | LowerGeometryPassthrough(program, host_info); | ||
| 269 | } | ||
| 201 | } | 270 | } |
| 202 | break; | 271 | break; |
| 203 | } | 272 | } |
| @@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 223 | 292 | ||
| 224 | Optimization::PositionPass(env, program); | 293 | Optimization::PositionPass(env, program); |
| 225 | 294 | ||
| 226 | Optimization::GlobalMemoryToStorageBufferPass(program); | 295 | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); |
| 227 | Optimization::TexturePass(env, program, host_info); | 296 | Optimization::TexturePass(env, program, host_info); |
| 228 | 297 | ||
| 229 | if (Settings::values.resolution_info.active) { | 298 | if (Settings::values.resolution_info.active) { |
| @@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, | |||
| 342 | IR::Program program; | 411 | IR::Program program; |
| 343 | program.stage = Stage::Geometry; | 412 | program.stage = Stage::Geometry; |
| 344 | program.output_topology = output_topology; | 413 | program.output_topology = output_topology; |
| 345 | switch (output_topology) { | 414 | program.output_vertices = GetOutputTopologyVertices(output_topology); |
| 346 | case OutputTopology::PointList: | ||
| 347 | program.output_vertices = 1; | ||
| 348 | break; | ||
| 349 | case OutputTopology::LineStrip: | ||
| 350 | program.output_vertices = 2; | ||
| 351 | break; | ||
| 352 | default: | ||
| 353 | program.output_vertices = 3; | ||
| 354 | break; | ||
| 355 | } | ||
| 356 | 415 | ||
| 357 | program.is_geometry_passthrough = false; | 416 | program.is_geometry_passthrough = false; |
| 358 | program.info.loads.mask = source_program.info.stores.mask; | 417 | program.info.loads.mask = source_program.info.stores.mask; |
| @@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, | |||
| 366 | node.data.block = current_block; | 425 | node.data.block = current_block; |
| 367 | 426 | ||
| 368 | IR::IREmitter ir{*current_block}; | 427 | IR::IREmitter ir{*current_block}; |
| 369 | for (u32 i = 0; i < program.output_vertices; i++) { | 428 | EmitGeometryPassthrough(ir, program, program.info.stores, true, |
| 370 | // Assign generics from input | 429 | source_program.info.emulated_layer); |
| 371 | for (u32 j = 0; j < 32; j++) { | ||
| 372 | if (!program.info.stores.Generic(j)) { | ||
| 373 | continue; | ||
| 374 | } | ||
| 375 | |||
| 376 | const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); | ||
| 377 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 378 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 379 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 380 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 381 | } | ||
| 382 | |||
| 383 | // Assign position from input | ||
| 384 | const IR::Attribute attr = IR::Attribute::PositionX; | ||
| 385 | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||
| 386 | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||
| 387 | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||
| 388 | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||
| 389 | |||
| 390 | // Assign layer | ||
| 391 | ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), | ||
| 392 | ir.Imm32(0)); | ||
| 393 | |||
| 394 | // Emit vertex | ||
| 395 | ir.EmitVertex(ir.Imm32(0)); | ||
| 396 | } | ||
| 397 | ir.EndPrimitive(ir.Imm32(0)); | ||
| 398 | 430 | ||
| 399 | IR::Block* return_block{block_pool.Create(inst_pool)}; | 431 | IR::Block* return_block{block_pool.Create(inst_pool)}; |
| 400 | IR::IREmitter{*return_block}.Epilogue(); | 432 | IR::IREmitter{*return_block}.Epilogue(); |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index d5d279554..55fc48768 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -15,6 +15,9 @@ struct HostTranslateInfo { | |||
| 15 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | 15 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |
| 16 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers | 16 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers |
| 17 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | 17 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |
| 18 | u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs | ||
| 19 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry | ||
| 20 | ///< passthrough shaders | ||
| 18 | }; | 21 | }; |
| 19 | 22 | ||
| 20 | } // namespace Shader | 23 | } // namespace Shader |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 336338e62..9101722ba 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/host_translate_info.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| 16 | namespace Shader::Optimization { | 17 | namespace Shader::Optimization { |
| @@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 402 | } | 403 | } |
| 403 | 404 | ||
| 404 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 405 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 405 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | 406 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { |
| 406 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | 407 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| 407 | IR::U32 offset; | 408 | IR::U32 offset; |
| 408 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | 409 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |
| @@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||
| 415 | } | 416 | } |
| 416 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 417 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 417 | // buffer offset in bytes. | 418 | // buffer offset in bytes. |
| 418 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | 419 | IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; |
| 420 | |||
| 421 | // Align the offset base to match the host alignment requirements | ||
| 422 | low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); | ||
| 419 | return ir.ISub(offset, low_cbuf); | 423 | return ir.ISub(offset, low_cbuf); |
| 420 | } | 424 | } |
| 421 | 425 | ||
| @@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 510 | } | 514 | } |
| 511 | } // Anonymous namespace | 515 | } // Anonymous namespace |
| 512 | 516 | ||
| 513 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | 517 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { |
| 514 | StorageInfo info; | 518 | StorageInfo info; |
| 515 | for (IR::Block* const block : program.post_order_blocks) { | 519 | for (IR::Block* const block : program.post_order_blocks) { |
| 516 | for (IR::Inst& inst : block->Instructions()) { | 520 | for (IR::Inst& inst : block->Instructions()) { |
| @@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 534 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | 538 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |
| 535 | IR::Block* const block{storage_inst.block}; | 539 | IR::Block* const block{storage_inst.block}; |
| 536 | IR::Inst* const inst{storage_inst.inst}; | 540 | IR::Inst* const inst{storage_inst.inst}; |
| 537 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | 541 | const IR::U32 offset{ |
| 542 | StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; | ||
| 538 | Replace(*block, *inst, index, offset); | 543 | Replace(*block, *inst, index, offset); |
| 539 | } | 544 | } |
| 540 | } | 545 | } |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95..4ffad1172 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -15,7 +15,7 @@ namespace Shader::Optimization { | |||
| 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); | 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); |
| 16 | void ConstantPropagationPass(Environment& env, IR::Program& program); | 16 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 17 | void DeadCodeEliminationPass(IR::Program& program); | 17 | void DeadCodeEliminationPass(IR::Program& program); |
| 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 19 | void IdentityRemovalPass(IR::Program& program); | 19 | void IdentityRemovalPass(IR::Program& program); |
| 20 | void LowerFp16ToFp32(IR::Program& program); | 20 | void LowerFp16ToFp32(IR::Program& program); |
| 21 | void LowerInt64ToInt32(IR::Program& program); | 21 | void LowerInt64ToInt32(IR::Program& program); |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index b8841a536..253e0d0bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -55,6 +55,8 @@ struct Profile { | |||
| 55 | 55 | ||
| 56 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | 56 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead |
| 57 | bool has_broken_spirv_clamp{}; | 57 | bool has_broken_spirv_clamp{}; |
| 58 | /// The Position builtin needs to be wrapped in a struct when used as an input | ||
| 59 | bool has_broken_spirv_position_input{}; | ||
| 58 | /// Offset image operands with an unsigned type do not work | 60 | /// Offset image operands with an unsigned type do not work |
| 59 | bool has_broken_unsigned_image_offsets{}; | 61 | bool has_broken_unsigned_image_offsets{}; |
| 60 | /// Signed instructions with unsigned data types are misinterpreted | 62 | /// Signed instructions with unsigned data types are misinterpreted |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 44236b6b1..f93181e1e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -65,6 +65,8 @@ enum class Interpolation { | |||
| 65 | struct ConstantBufferDescriptor { | 65 | struct ConstantBufferDescriptor { |
| 66 | u32 index; | 66 | u32 index; |
| 67 | u32 count; | 67 | u32 count; |
| 68 | |||
| 69 | auto operator<=>(const ConstantBufferDescriptor&) const = default; | ||
| 68 | }; | 70 | }; |
| 69 | 71 | ||
| 70 | struct StorageBufferDescriptor { | 72 | struct StorageBufferDescriptor { |
| @@ -72,6 +74,8 @@ struct StorageBufferDescriptor { | |||
| 72 | u32 cbuf_offset; | 74 | u32 cbuf_offset; |
| 73 | u32 count; | 75 | u32 count; |
| 74 | bool is_written; | 76 | bool is_written; |
| 77 | |||
| 78 | auto operator<=>(const StorageBufferDescriptor&) const = default; | ||
| 75 | }; | 79 | }; |
| 76 | 80 | ||
| 77 | struct TextureBufferDescriptor { | 81 | struct TextureBufferDescriptor { |
| @@ -84,6 +88,8 @@ struct TextureBufferDescriptor { | |||
| 84 | u32 secondary_shift_left; | 88 | u32 secondary_shift_left; |
| 85 | u32 count; | 89 | u32 count; |
| 86 | u32 size_shift; | 90 | u32 size_shift; |
| 91 | |||
| 92 | auto operator<=>(const TextureBufferDescriptor&) const = default; | ||
| 87 | }; | 93 | }; |
| 88 | using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; | 94 | using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; |
| 89 | 95 | ||
| @@ -95,6 +101,8 @@ struct ImageBufferDescriptor { | |||
| 95 | u32 cbuf_offset; | 101 | u32 cbuf_offset; |
| 96 | u32 count; | 102 | u32 count; |
| 97 | u32 size_shift; | 103 | u32 size_shift; |
| 104 | |||
| 105 | auto operator<=>(const ImageBufferDescriptor&) const = default; | ||
| 98 | }; | 106 | }; |
| 99 | using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; | 107 | using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; |
| 100 | 108 | ||
| @@ -110,6 +118,8 @@ struct TextureDescriptor { | |||
| 110 | u32 secondary_shift_left; | 118 | u32 secondary_shift_left; |
| 111 | u32 count; | 119 | u32 count; |
| 112 | u32 size_shift; | 120 | u32 size_shift; |
| 121 | |||
| 122 | auto operator<=>(const TextureDescriptor&) const = default; | ||
| 113 | }; | 123 | }; |
| 114 | using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; | 124 | using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; |
| 115 | 125 | ||
| @@ -122,6 +132,8 @@ struct ImageDescriptor { | |||
| 122 | u32 cbuf_offset; | 132 | u32 cbuf_offset; |
| 123 | u32 count; | 133 | u32 count; |
| 124 | u32 size_shift; | 134 | u32 size_shift; |
| 135 | |||
| 136 | auto operator<=>(const ImageDescriptor&) const = default; | ||
| 125 | }; | 137 | }; |
| 126 | using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; | 138 | using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; |
| 127 | 139 | ||