diff options
| author | 2021-05-02 01:50:27 +0200 | |
|---|---|---|
| committer | 2021-07-22 21:51:30 -0400 | |
| commit | ee61ec2c39e6db53c56e7ac761a2223d99f06908 (patch) | |
| tree | 2ae3e49e64b845de4db6756e6dfea5fd1694d674 /src | |
| parent | shader: Stub SR_AFFINITY (diff) | |
| download | yuzu-ee61ec2c39e6db53c56e7ac761a2223d99f06908.tar.gz yuzu-ee61ec2c39e6db53c56e7ac761a2223d99f06908.tar.xz yuzu-ee61ec2c39e6db53c56e7ac761a2223d99f06908.zip | |
shader: Optimize NVN Fallthrough
Diffstat (limited to 'src')
4 files changed, 83 insertions, 9 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 549df0d4b..be88b76f7 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | |||
| 840 | AddLabel(); | 840 | AddLabel(); |
| 841 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | 841 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; |
| 842 | for (size_t index = 0; index < num_buffers; ++index) { | 842 | for (size_t index = 0; index < num_buffers; ++index) { |
| 843 | if (!info.nvn_buffer_used[index]) { | ||
| 844 | continue; | ||
| 845 | } | ||
| 843 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | 846 | const auto& ssbo{info.storage_buffers_descriptors[index]}; |
| 844 | const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; | 847 | const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; |
| 845 | const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; | 848 | const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a4fa4319d..0d3f00699 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) { | |||
| 88 | }()}; | 88 | }()}; |
| 89 | auto& descs{program.info.storage_buffers_descriptors}; | 89 | auto& descs{program.info.storage_buffers_descriptors}; |
| 90 | for (u32 index = 0; index < num_buffers; ++index) { | 90 | for (u32 index = 0; index < num_buffers; ++index) { |
| 91 | if (!program.info.nvn_buffer_used[index]) { | ||
| 92 | continue; | ||
| 93 | } | ||
| 91 | const u32 offset{base + index * descriptor_size}; | 94 | const u32 offset{base + index * descriptor_size}; |
| 92 | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; | 95 | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; |
| 93 | if (it != descs.end()) { | 96 | if (it != descs.end()) { |
| 97 | it->is_written |= program.info.stores_global_memory; | ||
| 94 | continue; | 98 | continue; |
| 95 | } | 99 | } |
| 96 | // Assume these are written for now | ||
| 97 | descs.push_back({ | 100 | descs.push_back({ |
| 98 | .cbuf_index = driver_cbuf, | 101 | .cbuf_index = driver_cbuf, |
| 99 | .cbuf_offset = offset, | 102 | .cbuf_offset = offset, |
| 100 | .count = 1, | 103 | .count = 1, |
| 101 | .is_written = true, | 104 | .is_written = program.info.stores_global_memory, |
| 102 | }); | 105 | }); |
| 103 | } | 106 | } |
| 104 | } | 107 | } |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 7d8794a7e..13b793d57 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) { | |||
| 132 | } | 132 | } |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | void CheckCBufNVN(Info& info, IR::Inst& inst) { | ||
| 136 | const IR::Value cbuf_index{inst.Arg(0)}; | ||
| 137 | if (!cbuf_index.IsImmediate()) { | ||
| 138 | info.nvn_buffer_used.set(); | ||
| 139 | return; | ||
| 140 | } | ||
| 141 | const u32 index{cbuf_index.U32()}; | ||
| 142 | if (index != 0) { | ||
| 143 | return; | ||
| 144 | } | ||
| 145 | const IR::Value cbuf_offset{inst.Arg(1)}; | ||
| 146 | if (!cbuf_offset.IsImmediate()) { | ||
| 147 | info.nvn_buffer_used.set(); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | const u32 offset{cbuf_offset.U32()}; | ||
| 151 | const u32 descriptor_size{0x10}; | ||
| 152 | const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16}; | ||
| 153 | if (offset >= info.nvn_buffer_base && offset < upper_limit) { | ||
| 154 | const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size}; | ||
| 155 | info.nvn_buffer_used.set(nvn_index, true); | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 135 | void VisitUsages(Info& info, IR::Inst& inst) { | 159 | void VisitUsages(Info& info, IR::Inst& inst) { |
| 136 | switch (inst.GetOpcode()) { | 160 | switch (inst.GetOpcode()) { |
| 137 | case IR::Opcode::CompositeConstructF16x2: | 161 | case IR::Opcode::CompositeConstructF16x2: |
| @@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 382 | break; | 406 | break; |
| 383 | } | 407 | } |
| 384 | switch (inst.GetOpcode()) { | 408 | switch (inst.GetOpcode()) { |
| 385 | case IR::Opcode::LoadGlobalU8: | ||
| 386 | case IR::Opcode::LoadGlobalS8: | ||
| 387 | case IR::Opcode::LoadGlobalU16: | ||
| 388 | case IR::Opcode::LoadGlobalS16: | ||
| 389 | case IR::Opcode::LoadGlobal32: | ||
| 390 | case IR::Opcode::LoadGlobal64: | ||
| 391 | case IR::Opcode::LoadGlobal128: | ||
| 392 | case IR::Opcode::WriteGlobalU8: | 409 | case IR::Opcode::WriteGlobalU8: |
| 393 | case IR::Opcode::WriteGlobalS8: | 410 | case IR::Opcode::WriteGlobalS8: |
| 394 | case IR::Opcode::WriteGlobalU16: | 411 | case IR::Opcode::WriteGlobalU16: |
| @@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 423 | case IR::Opcode::GlobalAtomicMinF32x2: | 440 | case IR::Opcode::GlobalAtomicMinF32x2: |
| 424 | case IR::Opcode::GlobalAtomicMaxF16x2: | 441 | case IR::Opcode::GlobalAtomicMaxF16x2: |
| 425 | case IR::Opcode::GlobalAtomicMaxF32x2: | 442 | case IR::Opcode::GlobalAtomicMaxF32x2: |
| 443 | info.stores_global_memory = true; | ||
| 444 | [[fallthrough]]; | ||
| 445 | case IR::Opcode::LoadGlobalU8: | ||
| 446 | case IR::Opcode::LoadGlobalS8: | ||
| 447 | case IR::Opcode::LoadGlobalU16: | ||
| 448 | case IR::Opcode::LoadGlobalS16: | ||
| 449 | case IR::Opcode::LoadGlobal32: | ||
| 450 | case IR::Opcode::LoadGlobal64: | ||
| 451 | case IR::Opcode::LoadGlobal128: | ||
| 426 | info.uses_int64 = true; | 452 | info.uses_int64 = true; |
| 427 | info.uses_global_memory = true; | 453 | info.uses_global_memory = true; |
| 428 | info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; | 454 | info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; |
| @@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { | |||
| 800 | } | 826 | } |
| 801 | } | 827 | } |
| 802 | 828 | ||
| 829 | void VisitCbufs(Info& info, IR::Inst& inst) { | ||
| 830 | switch (inst.GetOpcode()) { | ||
| 831 | case IR::Opcode::GetCbufU8: | ||
| 832 | case IR::Opcode::GetCbufS8: | ||
| 833 | case IR::Opcode::GetCbufU16: | ||
| 834 | case IR::Opcode::GetCbufS16: | ||
| 835 | case IR::Opcode::GetCbufU32: | ||
| 836 | case IR::Opcode::GetCbufF32: | ||
| 837 | case IR::Opcode::GetCbufU32x2: { | ||
| 838 | CheckCBufNVN(info, inst); | ||
| 839 | break; | ||
| 840 | } | ||
| 841 | default: | ||
| 842 | break; | ||
| 843 | } | ||
| 844 | } | ||
| 845 | |||
| 803 | void Visit(Info& info, IR::Inst& inst) { | 846 | void Visit(Info& info, IR::Inst& inst) { |
| 804 | VisitUsages(info, inst); | 847 | VisitUsages(info, inst); |
| 805 | VisitFpModifiers(info, inst); | 848 | VisitFpModifiers(info, inst); |
| 849 | VisitCbufs(info, inst); | ||
| 806 | } | 850 | } |
| 807 | 851 | ||
| 808 | void GatherInfoFromHeader(Environment& env, Info& info) { | 852 | void GatherInfoFromHeader(Environment& env, Info& info) { |
| @@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) { | |||
| 839 | 883 | ||
| 840 | void CollectShaderInfoPass(Environment& env, IR::Program& program) { | 884 | void CollectShaderInfoPass(Environment& env, IR::Program& program) { |
| 841 | Info& info{program.info}; | 885 | Info& info{program.info}; |
| 886 | const u32 base{[&] { | ||
| 887 | switch (program.stage) { | ||
| 888 | case Stage::VertexA: | ||
| 889 | case Stage::VertexB: | ||
| 890 | return 0x110u; | ||
| 891 | case Stage::TessellationControl: | ||
| 892 | return 0x210u; | ||
| 893 | case Stage::TessellationEval: | ||
| 894 | return 0x310u; | ||
| 895 | case Stage::Geometry: | ||
| 896 | return 0x410u; | ||
| 897 | case Stage::Fragment: | ||
| 898 | return 0x510u; | ||
| 899 | case Stage::Compute: | ||
| 900 | return 0x310u; | ||
| 901 | } | ||
| 902 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 903 | }()}; | ||
| 904 | info.nvn_buffer_base = base; | ||
| 905 | |||
| 842 | for (IR::Block* const block : program.post_order_blocks) { | 906 | for (IR::Block* const block : program.post_order_blocks) { |
| 843 | for (IR::Inst& inst : block->Instructions()) { | 907 | for (IR::Inst& inst : block->Instructions()) { |
| 844 | Visit(info, inst); | 908 | Visit(info, inst); |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 2f6adf714..a50a9a18c 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bitset> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "shader_recompiler/frontend/ir/type.h" | 11 | #include "shader_recompiler/frontend/ir/type.h" |
| @@ -140,6 +141,7 @@ struct Info { | |||
| 140 | bool stores_tess_level_outer{}; | 141 | bool stores_tess_level_outer{}; |
| 141 | bool stores_tess_level_inner{}; | 142 | bool stores_tess_level_inner{}; |
| 142 | bool stores_indexed_attributes{}; | 143 | bool stores_indexed_attributes{}; |
| 144 | bool stores_global_memory{}; | ||
| 143 | 145 | ||
| 144 | bool uses_fp16{}; | 146 | bool uses_fp16{}; |
| 145 | bool uses_fp64{}; | 147 | bool uses_fp64{}; |
| @@ -180,6 +182,8 @@ struct Info { | |||
| 180 | IR::Type used_storage_buffer_types{}; | 182 | IR::Type used_storage_buffer_types{}; |
| 181 | 183 | ||
| 182 | u32 constant_buffer_mask{}; | 184 | u32 constant_buffer_mask{}; |
| 185 | u32 nvn_buffer_base{}; | ||
| 186 | std::bitset<16> nvn_buffer_used{}; | ||
| 183 | 187 | ||
| 184 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | 188 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> |
| 185 | constant_buffer_descriptors; | 189 | constant_buffer_descriptors; |