diff options
| author | 2021-04-17 11:56:45 +0200 | |
|---|---|---|
| committer | 2021-07-22 21:51:28 -0400 | |
| commit | 04c459fc8d99b41fa8a03c49523599e9bf797f9d (patch) | |
| tree | 199934b3ef85a5affbe9ef115e9b0a1085851b36 /src | |
| parent | shader: Implement SR_Y_DIRECTION (diff) | |
| download | yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.gz yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.xz yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.zip | |
shader: Implement fine derivates constant propagation
Diffstat (limited to 'src')
9 files changed, 101 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9ec970706..c4d5874ca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -341,6 +341,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||
| 341 | if (!ctx.profile.xfb_varyings.empty()) { | 341 | if (!ctx.profile.xfb_varyings.empty()) { |
| 342 | ctx.AddCapability(spv::Capability::TransformFeedback); | 342 | ctx.AddCapability(spv::Capability::TransformFeedback); |
| 343 | } | 343 | } |
| 344 | if (info.uses_derivates) { | ||
| 345 | ctx.AddCapability(spv::Capability::DerivativeControl); | ||
| 346 | } | ||
| 344 | // TODO: Track this usage | 347 | // TODO: Track this usage |
| 345 | ctx.AddCapability(spv::Capability::ImageGatherExtended); | 348 | ctx.AddCapability(spv::Capability::ImageGatherExtended); |
| 346 | ctx.AddCapability(spv::Capability::ImageQuery); | 349 | ctx.AddCapability(spv::Capability::ImageQuery); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7949d08d0..dec4f434a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -529,4 +529,8 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id | |||
| 529 | Id segmentation_mask); | 529 | Id segmentation_mask); |
| 530 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); | 530 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); |
| 531 | 531 | ||
| 532 | Id EmitDPdxFine(EmitContext& ctx, Id op_a); | ||
| 533 | |||
| 534 | Id EmitDPdyFine(EmitContext& ctx, Id op_a); | ||
| 535 | |||
| 532 | } // namespace Shader::Backend::SPIRV | 536 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 1c23ccc08..d53412204 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -183,4 +183,12 @@ Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { | |||
| 183 | return ctx.OpFAdd(ctx.F32[1], result_a, result_b); | 183 | return ctx.OpFAdd(ctx.F32[1], result_a, result_b); |
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | Id EmitDPdxFine(EmitContext& ctx, Id op_a) { | ||
| 187 | return ctx.OpDPdxFine(ctx.F32[1], op_a); | ||
| 188 | } | ||
| 189 | |||
| 190 | Id EmitDPdyFine(EmitContext& ctx, Id op_a) { | ||
| 191 | return ctx.OpDPdyFine(ctx.F32[1], op_a); | ||
| 192 | } | ||
| 193 | |||
| 186 | } // namespace Shader::Backend::SPIRV | 194 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index c3e8d0681..845a57b1e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -1925,4 +1925,12 @@ F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpCon | |||
| 1925 | return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); | 1925 | return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); |
| 1926 | } | 1926 | } |
| 1927 | 1927 | ||
| 1928 | F32 IREmitter::DPdxFine(const F32& a) { | ||
| 1929 | return Inst<F32>(Opcode::DPdxFine, a); | ||
| 1930 | } | ||
| 1931 | |||
| 1932 | F32 IREmitter::DPdyFine(const F32& a) { | ||
| 1933 | return Inst<F32>(Opcode::DPdyFine, a); | ||
| 1934 | } | ||
| 1935 | |||
| 1928 | } // namespace Shader::IR | 1936 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7e67f5e30..c7101d668 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -353,6 +353,10 @@ public: | |||
| 353 | [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, | 353 | [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, |
| 354 | FpControl control = {}); | 354 | FpControl control = {}); |
| 355 | 355 | ||
| 356 | [[nodiscard]] F32 DPdxFine(const F32& a); | ||
| 357 | |||
| 358 | [[nodiscard]] F32 DPdyFine(const F32& a); | ||
| 359 | |||
| 356 | private: | 360 | private: |
| 357 | IR::Block::iterator insertion_point; | 361 | IR::Block::iterator insertion_point; |
| 358 | 362 | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 269de8ca5..e4cb8964a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -511,3 +511,5 @@ OPCODE(ShuffleUp, U32, U32, | |||
| 511 | OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) | 511 | OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) |
| 512 | OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) | 512 | OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) |
| 513 | OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) | 513 | OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) |
| 514 | OPCODE(DPdxFine, F32, F32, ) | ||
| 515 | OPCODE(DPdyFine, F32, F32, ) | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60b7d3a36..e5688667b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -530,6 +530,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 530 | case IR::Opcode::FSwizzleAdd: | 530 | case IR::Opcode::FSwizzleAdd: |
| 531 | info.uses_fswzadd = true; | 531 | info.uses_fswzadd = true; |
| 532 | break; | 532 | break; |
| 533 | case IR::Opcode::DPdxFine: | ||
| 534 | case IR::Opcode::DPdyFine: | ||
| 535 | info.uses_derivates = true; | ||
| 536 | break; | ||
| 533 | case IR::Opcode::LoadStorageU8: | 537 | case IR::Opcode::LoadStorageU8: |
| 534 | case IR::Opcode::LoadStorageS8: | 538 | case IR::Opcode::LoadStorageS8: |
| 535 | case IR::Opcode::WriteStorageU8: | 539 | case IR::Opcode::WriteStorageU8: |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ee73b5b60..983fb20ab 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -412,6 +412,71 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser | |||
| 412 | inst.ReplaceUsesWith(*result); | 412 | inst.ReplaceUsesWith(*result); |
| 413 | } | 413 | } |
| 414 | 414 | ||
| 415 | IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { | ||
| 416 | if (value.IsImmediate()) { | ||
| 417 | return value; | ||
| 418 | } | ||
| 419 | IR::Inst* const inst{value.InstRecursive()}; | ||
| 420 | if (inst->GetOpcode() == expected_cast) { | ||
| 421 | return inst->Arg(0).Resolve(); | ||
| 422 | } | ||
| 423 | return value; | ||
| 424 | } | ||
| 425 | |||
| 426 | void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||
| 427 | const IR::Value swizzle{inst.Arg(2)}; | ||
| 428 | if (!swizzle.IsImmediate()) { | ||
| 429 | return; | ||
| 430 | } | ||
| 431 | |||
| 432 | const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 433 | const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 434 | |||
| 435 | if (value_1.IsImmediate()) { | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | |||
| 439 | const u32 swizzle_value{swizzle.U32()}; | ||
| 440 | if (swizzle_value != 0x99 && swizzle_value != 0xA5) { | ||
| 441 | return; | ||
| 442 | } | ||
| 443 | |||
| 444 | IR::Inst* const inst2{value_1.InstRecursive()}; | ||
| 445 | if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { | ||
| 446 | return; | ||
| 447 | } | ||
| 448 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | ||
| 449 | if (value_2 != value_3) { | ||
| 450 | return; | ||
| 451 | } | ||
| 452 | |||
| 453 | const IR::Value index{inst2->Arg(1)}; | ||
| 454 | const IR::Value clamp{inst2->Arg(2)}; | ||
| 455 | const IR::Value segmentation_mask{inst2->Arg(3)}; | ||
| 456 | |||
| 457 | if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||
| 458 | return; | ||
| 459 | } | ||
| 460 | |||
| 461 | if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { | ||
| 462 | return; | ||
| 463 | } | ||
| 464 | |||
| 465 | if (swizzle_value == 0x99) { | ||
| 466 | // DPdxFine | ||
| 467 | if (index.U32() == 1) { | ||
| 468 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 469 | inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); | ||
| 470 | } | ||
| 471 | } else if (swizzle_value == 0xA5) { | ||
| 472 | // DPdyFine | ||
| 473 | if (index.U32() == 2) { | ||
| 474 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 475 | inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); | ||
| 476 | } | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 415 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | 480 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { |
| 416 | switch (inst.GetOpcode()) { | 481 | switch (inst.GetOpcode()) { |
| 417 | case IR::Opcode::GetRegister: | 482 | case IR::Opcode::GetRegister: |
| @@ -532,6 +597,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | |||
| 532 | case IR::Opcode::CompositeExtractF16x4: | 597 | case IR::Opcode::CompositeExtractF16x4: |
| 533 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, | 598 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, |
| 534 | IR::Opcode::CompositeInsertF16x4); | 599 | IR::Opcode::CompositeInsertF16x4); |
| 600 | case IR::Opcode::FSwizzleAdd: | ||
| 601 | return FoldFSwizzleAdd(block, inst); | ||
| 535 | default: | 602 | default: |
| 536 | break; | 603 | break; |
| 537 | } | 604 | } |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 0a8931930..3f22958e8 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -147,6 +147,7 @@ struct Info { | |||
| 147 | bool uses_subgroup_vote{}; | 147 | bool uses_subgroup_vote{}; |
| 148 | bool uses_subgroup_mask{}; | 148 | bool uses_subgroup_mask{}; |
| 149 | bool uses_fswzadd{}; | 149 | bool uses_fswzadd{}; |
| 150 | bool uses_derivates{}; | ||
| 150 | bool uses_typeless_image_reads{}; | 151 | bool uses_typeless_image_reads{}; |
| 151 | bool uses_typeless_image_writes{}; | 152 | bool uses_typeless_image_writes{}; |
| 152 | bool uses_shared_increment{}; | 153 | bool uses_shared_increment{}; |