summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
authorGravatar FernandoS272021-05-02 01:50:27 +0200
committerGravatar ameerj2021-07-22 21:51:30 -0400
commitee61ec2c39e6db53c56e7ac761a2223d99f06908 (patch)
tree2ae3e49e64b845de4db6756e6dfea5fd1694d674 /src/shader_recompiler
parentshader: Stub SR_AFFINITY (diff)
downloadyuzu-ee61ec2c39e6db53c56e7ac761a2223d99f06908.tar.gz
yuzu-ee61ec2c39e6db53c56e7ac761a2223d99f06908.tar.xz
yuzu-ee61ec2c39e6db53c56e7ac761a2223d99f06908.zip
shader: Optimize NVN Fallthrough
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp3
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp7
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp78
-rw-r--r--src/shader_recompiler/shader_info.h4
4 files changed, 83 insertions, 9 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 549df0d4b..be88b76f7 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
840 AddLabel(); 840 AddLabel();
841 const size_t num_buffers{info.storage_buffers_descriptors.size()}; 841 const size_t num_buffers{info.storage_buffers_descriptors.size()};
842 for (size_t index = 0; index < num_buffers; ++index) { 842 for (size_t index = 0; index < num_buffers; ++index) {
843 if (!info.nvn_buffer_used[index]) {
844 continue;
845 }
843 const auto& ssbo{info.storage_buffers_descriptors[index]}; 846 const auto& ssbo{info.storage_buffers_descriptors[index]};
844 const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; 847 const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
845 const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; 848 const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index a4fa4319d..0d3f00699 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) {
88 }()}; 88 }()};
89 auto& descs{program.info.storage_buffers_descriptors}; 89 auto& descs{program.info.storage_buffers_descriptors};
90 for (u32 index = 0; index < num_buffers; ++index) { 90 for (u32 index = 0; index < num_buffers; ++index) {
91 if (!program.info.nvn_buffer_used[index]) {
92 continue;
93 }
91 const u32 offset{base + index * descriptor_size}; 94 const u32 offset{base + index * descriptor_size};
92 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; 95 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
93 if (it != descs.end()) { 96 if (it != descs.end()) {
97 it->is_written |= program.info.stores_global_memory;
94 continue; 98 continue;
95 } 99 }
96 // Assume these are written for now
97 descs.push_back({ 100 descs.push_back({
98 .cbuf_index = driver_cbuf, 101 .cbuf_index = driver_cbuf,
99 .cbuf_offset = offset, 102 .cbuf_offset = offset,
100 .count = 1, 103 .count = 1,
101 .is_written = true, 104 .is_written = program.info.stores_global_memory,
102 }); 105 });
103 } 106 }
104} 107}
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 7d8794a7e..13b793d57 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) {
132 } 132 }
133} 133}
134 134
135void CheckCBufNVN(Info& info, IR::Inst& inst) {
136 const IR::Value cbuf_index{inst.Arg(0)};
137 if (!cbuf_index.IsImmediate()) {
138 info.nvn_buffer_used.set();
139 return;
140 }
141 const u32 index{cbuf_index.U32()};
142 if (index != 0) {
143 return;
144 }
145 const IR::Value cbuf_offset{inst.Arg(1)};
146 if (!cbuf_offset.IsImmediate()) {
147 info.nvn_buffer_used.set();
148 return;
149 }
150 const u32 offset{cbuf_offset.U32()};
151 const u32 descriptor_size{0x10};
152 const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
153 if (offset >= info.nvn_buffer_base && offset < upper_limit) {
154 const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
155 info.nvn_buffer_used.set(nvn_index, true);
156 }
157}
158
135void VisitUsages(Info& info, IR::Inst& inst) { 159void VisitUsages(Info& info, IR::Inst& inst) {
136 switch (inst.GetOpcode()) { 160 switch (inst.GetOpcode()) {
137 case IR::Opcode::CompositeConstructF16x2: 161 case IR::Opcode::CompositeConstructF16x2:
@@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) {
382 break; 406 break;
383 } 407 }
384 switch (inst.GetOpcode()) { 408 switch (inst.GetOpcode()) {
385 case IR::Opcode::LoadGlobalU8:
386 case IR::Opcode::LoadGlobalS8:
387 case IR::Opcode::LoadGlobalU16:
388 case IR::Opcode::LoadGlobalS16:
389 case IR::Opcode::LoadGlobal32:
390 case IR::Opcode::LoadGlobal64:
391 case IR::Opcode::LoadGlobal128:
392 case IR::Opcode::WriteGlobalU8: 409 case IR::Opcode::WriteGlobalU8:
393 case IR::Opcode::WriteGlobalS8: 410 case IR::Opcode::WriteGlobalS8:
394 case IR::Opcode::WriteGlobalU16: 411 case IR::Opcode::WriteGlobalU16:
@@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
423 case IR::Opcode::GlobalAtomicMinF32x2: 440 case IR::Opcode::GlobalAtomicMinF32x2:
424 case IR::Opcode::GlobalAtomicMaxF16x2: 441 case IR::Opcode::GlobalAtomicMaxF16x2:
425 case IR::Opcode::GlobalAtomicMaxF32x2: 442 case IR::Opcode::GlobalAtomicMaxF32x2:
443 info.stores_global_memory = true;
444 [[fallthrough]];
445 case IR::Opcode::LoadGlobalU8:
446 case IR::Opcode::LoadGlobalS8:
447 case IR::Opcode::LoadGlobalU16:
448 case IR::Opcode::LoadGlobalS16:
449 case IR::Opcode::LoadGlobal32:
450 case IR::Opcode::LoadGlobal64:
451 case IR::Opcode::LoadGlobal128:
426 info.uses_int64 = true; 452 info.uses_int64 = true;
427 info.uses_global_memory = true; 453 info.uses_global_memory = true;
428 info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; 454 info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
@@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
800 } 826 }
801} 827}
802 828
829void VisitCbufs(Info& info, IR::Inst& inst) {
830 switch (inst.GetOpcode()) {
831 case IR::Opcode::GetCbufU8:
832 case IR::Opcode::GetCbufS8:
833 case IR::Opcode::GetCbufU16:
834 case IR::Opcode::GetCbufS16:
835 case IR::Opcode::GetCbufU32:
836 case IR::Opcode::GetCbufF32:
837 case IR::Opcode::GetCbufU32x2: {
838 CheckCBufNVN(info, inst);
839 break;
840 }
841 default:
842 break;
843 }
844}
845
803void Visit(Info& info, IR::Inst& inst) { 846void Visit(Info& info, IR::Inst& inst) {
804 VisitUsages(info, inst); 847 VisitUsages(info, inst);
805 VisitFpModifiers(info, inst); 848 VisitFpModifiers(info, inst);
849 VisitCbufs(info, inst);
806} 850}
807 851
808void GatherInfoFromHeader(Environment& env, Info& info) { 852void GatherInfoFromHeader(Environment& env, Info& info) {
@@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
839 883
840void CollectShaderInfoPass(Environment& env, IR::Program& program) { 884void CollectShaderInfoPass(Environment& env, IR::Program& program) {
841 Info& info{program.info}; 885 Info& info{program.info};
886 const u32 base{[&] {
887 switch (program.stage) {
888 case Stage::VertexA:
889 case Stage::VertexB:
890 return 0x110u;
891 case Stage::TessellationControl:
892 return 0x210u;
893 case Stage::TessellationEval:
894 return 0x310u;
895 case Stage::Geometry:
896 return 0x410u;
897 case Stage::Fragment:
898 return 0x510u;
899 case Stage::Compute:
900 return 0x310u;
901 }
902 throw InvalidArgument("Invalid stage {}", program.stage);
903 }()};
904 info.nvn_buffer_base = base;
905
842 for (IR::Block* const block : program.post_order_blocks) { 906 for (IR::Block* const block : program.post_order_blocks) {
843 for (IR::Inst& inst : block->Instructions()) { 907 for (IR::Inst& inst : block->Instructions()) {
844 Visit(info, inst); 908 Visit(info, inst);
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 2f6adf714..a50a9a18c 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/type.h" 11#include "shader_recompiler/frontend/ir/type.h"
@@ -140,6 +141,7 @@ struct Info {
140 bool stores_tess_level_outer{}; 141 bool stores_tess_level_outer{};
141 bool stores_tess_level_inner{}; 142 bool stores_tess_level_inner{};
142 bool stores_indexed_attributes{}; 143 bool stores_indexed_attributes{};
144 bool stores_global_memory{};
143 145
144 bool uses_fp16{}; 146 bool uses_fp16{};
145 bool uses_fp64{}; 147 bool uses_fp64{};
@@ -180,6 +182,8 @@ struct Info {
180 IR::Type used_storage_buffer_types{}; 182 IR::Type used_storage_buffer_types{};
181 183
182 u32 constant_buffer_mask{}; 184 u32 constant_buffer_mask{};
185 u32 nvn_buffer_base{};
186 std::bitset<16> nvn_buffer_used{};
183 187
184 boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> 188 boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
185 constant_buffer_descriptors; 189 constant_buffer_descriptors;