summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend/maxwell/translate_program.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2021-07-25 11:39:04 -0700
committerGravatar GitHub2021-07-25 11:39:04 -0700
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/frontend/maxwell/translate_program.cpp
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
1 files changed, 223 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <vector>
8
9#include "common/settings.h"
10#include "shader_recompiler/exception.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/post_order.h"
13#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
14#include "shader_recompiler/frontend/maxwell/translate/translate.h"
15#include "shader_recompiler/frontend/maxwell/translate_program.h"
16#include "shader_recompiler/host_translate_info.h"
17#include "shader_recompiler/ir_opt/passes.h"
18
19namespace Shader::Maxwell {
20namespace {
21IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
22 size_t num_syntax_blocks{};
23 for (const auto& node : syntax_list) {
24 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
25 ++num_syntax_blocks;
26 }
27 }
28 IR::BlockList blocks;
29 blocks.reserve(num_syntax_blocks);
30 for (const auto& node : syntax_list) {
31 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
32 blocks.push_back(node.data.block);
33 }
34 }
35 return blocks;
36}
37
38void RemoveUnreachableBlocks(IR::Program& program) {
39 // Some blocks might be unreachable if a function call exists unconditionally
40 // If this happens the number of blocks and post order blocks will mismatch
41 if (program.blocks.size() == program.post_order_blocks.size()) {
42 return;
43 }
44 const auto begin{program.blocks.begin() + 1};
45 const auto end{program.blocks.end()};
46 const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
47 program.blocks.erase(std::remove_if(begin, end, pred), end);
48}
49
50void CollectInterpolationInfo(Environment& env, IR::Program& program) {
51 if (program.stage != Stage::Fragment) {
52 return;
53 }
54 const ProgramHeader& sph{env.SPH()};
55 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
56 std::optional<PixelImap> imap;
57 for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
58 if (value == PixelImap::Unused) {
59 continue;
60 }
61 if (imap && imap != value) {
62 throw NotImplementedException("Per component interpolation");
63 }
64 imap = value;
65 }
66 if (!imap) {
67 continue;
68 }
69 program.info.interpolation[index] = [&] {
70 switch (*imap) {
71 case PixelImap::Unused:
72 case PixelImap::Perspective:
73 return Interpolation::Smooth;
74 case PixelImap::Constant:
75 return Interpolation::Flat;
76 case PixelImap::ScreenLinear:
77 return Interpolation::NoPerspective;
78 }
79 throw NotImplementedException("Unknown interpolation {}", *imap);
80 }();
81 }
82}
83
84void AddNVNStorageBuffers(IR::Program& program) {
85 if (!program.info.uses_global_memory) {
86 return;
87 }
88 const u32 driver_cbuf{0};
89 const u32 descriptor_size{0x10};
90 const u32 num_buffers{16};
91 const u32 base{[&] {
92 switch (program.stage) {
93 case Stage::VertexA:
94 case Stage::VertexB:
95 return 0x110u;
96 case Stage::TessellationControl:
97 return 0x210u;
98 case Stage::TessellationEval:
99 return 0x310u;
100 case Stage::Geometry:
101 return 0x410u;
102 case Stage::Fragment:
103 return 0x510u;
104 case Stage::Compute:
105 return 0x310u;
106 }
107 throw InvalidArgument("Invalid stage {}", program.stage);
108 }()};
109 auto& descs{program.info.storage_buffers_descriptors};
110 for (u32 index = 0; index < num_buffers; ++index) {
111 if (!program.info.nvn_buffer_used[index]) {
112 continue;
113 }
114 const u32 offset{base + index * descriptor_size};
115 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
116 if (it != descs.end()) {
117 it->is_written |= program.info.stores_global_memory;
118 continue;
119 }
120 descs.push_back({
121 .cbuf_index = driver_cbuf,
122 .cbuf_offset = offset,
123 .count = 1,
124 .is_written = program.info.stores_global_memory,
125 });
126 }
127}
128} // Anonymous namespace
129
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage();
137 program.local_memory_size = env.LocalMemorySize();
138 switch (program.stage) {
139 case Stage::TessellationControl: {
140 const ProgramHeader& sph{env.SPH()};
141 program.invocations = sph.common2.threads_per_input_primitive;
142 break;
143 }
144 case Stage::Geometry: {
145 const ProgramHeader& sph{env.SPH()};
146 program.output_topology = sph.common3.output_topology;
147 program.output_vertices = sph.common4.max_output_vertices;
148 program.invocations = sph.common2.threads_per_input_primitive;
149 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
150 if (program.is_geometry_passthrough) {
151 const auto& mask{env.GpPassthroughMask()};
152 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
153 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
154 }
155 }
156 break;
157 }
158 case Stage::Compute:
159 program.workgroup_size = env.WorkgroupSize();
160 program.shared_memory_size = env.SharedMemorySize();
161 break;
162 default:
163 break;
164 }
165 RemoveUnreachableBlocks(program);
166
167 // Replace instructions before the SSA rewrite
168 if (!host_info.support_float16) {
169 Optimization::LowerFp16ToFp32(program);
170 }
171 if (!host_info.support_int64) {
172 Optimization::LowerInt64ToInt32(program);
173 }
174 Optimization::SsaRewritePass(program);
175
176 Optimization::GlobalMemoryToStorageBufferPass(program);
177 Optimization::TexturePass(env, program);
178
179 Optimization::ConstantPropagationPass(program);
180 Optimization::DeadCodeEliminationPass(program);
181 if (Settings::values.renderer_debug) {
182 Optimization::VerificationPass(program);
183 }
184 Optimization::CollectShaderInfoPass(env, program);
185 CollectInterpolationInfo(env, program);
186 AddNVNStorageBuffers(program);
187 return program;
188}
189
190IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
191 Environment& env_vertex_b) {
192 IR::Program result{};
193 Optimization::VertexATransformPass(vertex_a);
194 Optimization::VertexBTransformPass(vertex_b);
195 for (const auto& term : vertex_a.syntax_list) {
196 if (term.type != IR::AbstractSyntaxNode::Type::Return) {
197 result.syntax_list.push_back(term);
198 }
199 }
200 result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
201 vertex_b.syntax_list.end());
202 result.blocks = GenerateBlocks(result.syntax_list);
203 result.post_order_blocks = vertex_b.post_order_blocks;
204 for (const auto& block : vertex_a.post_order_blocks) {
205 result.post_order_blocks.push_back(block);
206 }
207 result.stage = Stage::VertexB;
208 result.info = vertex_a.info;
209 result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
210 result.info.loads.mask |= vertex_b.info.loads.mask;
211 result.info.stores.mask |= vertex_b.info.stores.mask;
212
213 Optimization::JoinTextureInfo(result.info, vertex_b.info);
214 Optimization::JoinStorageInfo(result.info, vertex_b.info);
215 Optimization::DeadCodeEliminationPass(result);
216 if (Settings::values.renderer_debug) {
217 Optimization::VerificationPass(result);
218 }
219 Optimization::CollectShaderInfoPass(env_vertex_b, result);
220 return result;
221}
222
223} // namespace Shader::Maxwell