summaryrefslogtreecommitdiff
path: root/src/video_core/shader/decode.cpp
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-02-16 20:52:12 -0300
committerGravatar ameerj2021-07-22 21:51:22 -0400
commitc67d64365a712830fe140dd36e24e2efd9b8a812 (patch)
tree9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/shader/decode.cpp
parentshader: Add XMAD multiplication folding optimization (diff)
downloadyuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip
shader: Remove old shader management
Diffstat (limited to 'src/video_core/shader/decode.cpp')
-rw-r--r--src/video_core/shader/decode.cpp368
1 files changed, 0 insertions, 368 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <limits>
7#include <set>
8
9#include <fmt/format.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/engines/shader_header.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/node_helper.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22using Tegra::Shader::Instruction;
23using Tegra::Shader::OpCode;
24
25namespace {
26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return;
31 }
32 u32 count{};
33 std::vector<u32> bound_offsets;
34 for (const auto& sampler : used_samplers) {
35 if (sampler.is_bindless) {
36 continue;
37 }
38 ++count;
39 bound_offsets.emplace_back(sampler.offset);
40 }
41 if (count > 1) {
42 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
43 }
44}
45
46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) {
52 if (sampler.is_bindless) {
53 continue;
54 }
55 if (sampler.offset > base_offset) {
56 max_offset = std::min(sampler.offset, max_offset);
57 }
58 }
59 if (max_offset == std::numeric_limits<u32>::max()) {
60 return std::nullopt;
61 }
62 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
63}
64
65} // Anonymous namespace
66
67class ASTDecoder {
68public:
69 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
70
71 void operator()(ASTProgram& ast) {
72 ASTNode current = ast.nodes.GetFirst();
73 while (current) {
74 Visit(current);
75 current = current->GetNext();
76 }
77 }
78
79 void operator()(ASTIfThen& ast) {
80 ASTNode current = ast.nodes.GetFirst();
81 while (current) {
82 Visit(current);
83 current = current->GetNext();
84 }
85 }
86
87 void operator()(ASTIfElse& ast) {
88 ASTNode current = ast.nodes.GetFirst();
89 while (current) {
90 Visit(current);
91 current = current->GetNext();
92 }
93 }
94
95 void operator()(ASTBlockEncoded& ast) {}
96
97 void operator()(ASTBlockDecoded& ast) {}
98
99 void operator()(ASTVarSet& ast) {}
100
101 void operator()(ASTLabel& ast) {}
102
103 void operator()(ASTGoto& ast) {}
104
105 void operator()(ASTDoWhile& ast) {
106 ASTNode current = ast.nodes.GetFirst();
107 while (current) {
108 Visit(current);
109 current = current->GetNext();
110 }
111 }
112
113 void operator()(ASTReturn& ast) {}
114
115 void operator()(ASTBreak& ast) {}
116
117 void Visit(ASTNode& node) {
118 std::visit(*this, *node->GetInnerData());
119 if (node->IsBlockEncoded()) {
120 auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
121 NodeBlock bb = ir.DecodeRange(block->start, block->end);
122 node->TransformBlockEncoded(std::move(bb));
123 }
124 }
125
126private:
127 ShaderIR& ir;
128};
129
130void ShaderIR::Decode() {
131 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
132
133 decompiled = false;
134 auto info = ScanFlow(program_code, main_offset, settings, registry);
135 auto& shader_info = *info;
136 coverage_begin = shader_info.start;
137 coverage_end = shader_info.end;
138 switch (shader_info.settings.depth) {
139 case CompileDepth::FlowStack: {
140 for (const auto& block : shader_info.blocks) {
141 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
142 }
143 break;
144 }
145 case CompileDepth::NoFlowStack: {
146 disable_flow_stack = true;
147 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
148 if (label == static_cast<u32>(exit_branch)) {
149 return;
150 }
151 basic_blocks.insert({label, nodes});
152 };
153 const auto& blocks = shader_info.blocks;
154 NodeBlock current_block;
155 u32 current_label = static_cast<u32>(exit_branch);
156 for (const auto& block : blocks) {
157 if (shader_info.labels.contains(block.start)) {
158 insert_block(current_block, current_label);
159 current_block.clear();
160 current_label = block.start;
161 }
162 if (!block.ignore_branch) {
163 DecodeRangeInner(current_block, block.start, block.end);
164 InsertControlFlow(current_block, block);
165 } else {
166 DecodeRangeInner(current_block, block.start, block.end + 1);
167 }
168 }
169 insert_block(current_block, current_label);
170 break;
171 }
172 case CompileDepth::DecompileBackwards:
173 case CompileDepth::FullDecompile: {
174 program_manager = std::move(shader_info.manager);
175 disable_flow_stack = true;
176 decompiled = true;
177 ASTDecoder decoder{*this};
178 ASTNode program = GetASTProgram();
179 decoder.Visit(program);
180 break;
181 }
182 default:
183 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
184 [[fallthrough]];
185 case CompileDepth::BruteForce: {
186 const auto shader_end = static_cast<u32>(program_code.size());
187 coverage_begin = main_offset;
188 coverage_end = shader_end;
189 for (u32 label = main_offset; label < shader_end; ++label) {
190 basic_blocks.insert({label, DecodeRange(label, label + 1)});
191 }
192 break;
193 }
194 }
195 if (settings.depth != shader_info.settings.depth) {
196 LOG_WARNING(
197 HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
198 CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
199 }
200}
201
202NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
203 NodeBlock basic_block;
204 DecodeRangeInner(basic_block, begin, end);
205 return basic_block;
206}
207
208void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
209 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
210 pc = DecodeInstr(bb, pc);
211 }
212}
213
214void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
215 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
216 Node result = n;
217 if (cond.cc != ConditionCode::T) {
218 result = Conditional(GetConditionCode(cond.cc), {result});
219 }
220 if (cond.predicate != Pred::UnusedIndex) {
221 u32 pred = static_cast<u32>(cond.predicate);
222 const bool is_neg = pred > 7;
223 if (is_neg) {
224 pred -= 8;
225 }
226 result = Conditional(GetPredicate(pred, is_neg), {result});
227 }
228 return result;
229 };
230 if (std::holds_alternative<SingleBranch>(*block.branch)) {
231 auto branch = std::get_if<SingleBranch>(block.branch.get());
232 if (branch->address < 0) {
233 if (branch->kill) {
234 Node n = Operation(OperationCode::Discard);
235 n = apply_conditions(branch->condition, n);
236 bb.push_back(n);
237 global_code.push_back(n);
238 return;
239 }
240 Node n = Operation(OperationCode::Exit);
241 n = apply_conditions(branch->condition, n);
242 bb.push_back(n);
243 global_code.push_back(n);
244 return;
245 }
246 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
247 n = apply_conditions(branch->condition, n);
248 bb.push_back(n);
249 global_code.push_back(n);
250 return;
251 }
252 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
253 Node op_a = GetRegister(multi_branch->gpr);
254 for (auto& branch_case : multi_branch->branches) {
255 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
256 Node op_b = Immediate(branch_case.cmp_value);
257 Node condition =
258 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
259 auto result = Conditional(condition, {n});
260 bb.push_back(result);
261 global_code.push_back(result);
262 }
263}
264
265u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
266 // Ignore sched instructions when generating code.
267 if (IsSchedInstruction(pc, main_offset)) {
268 return pc + 1;
269 }
270
271 const Instruction instr = {program_code[pc]};
272 const auto opcode = OpCode::Decode(instr);
273 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
274
275 // Decoding failure
276 if (!opcode) {
277 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
278 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
279 nv_address, instr.value)));
280 return pc + 1;
281 }
282
283 bb.push_back(Comment(
284 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
285
286 using Tegra::Shader::Pred;
287 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
288 "NeverExecute predicate not implemented");
289
290 static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
291 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
292 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
293 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
294 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
295 {OpCode::Type::Shift, &ShaderIR::DecodeShift},
296 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
297 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
298 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
299 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
300 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
301 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
302 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
303 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
304 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
305 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
306 {OpCode::Type::Image, &ShaderIR::DecodeImage},
307 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
308 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
309 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
310 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
311 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
312 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
313 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
314 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
315 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
316 {OpCode::Type::Video, &ShaderIR::DecodeVideo},
317 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
318 };
319
320 std::vector<Node> tmp_block;
321 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
322 pc = (this->*decoder->second)(tmp_block, pc);
323 } else {
324 pc = DecodeOther(tmp_block, pc);
325 }
326
327 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
328 // executed.
329 const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
330 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
331
332 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
333 const Node conditional =
334 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
335 global_code.push_back(conditional);
336 bb.push_back(conditional);
337 } else {
338 for (auto& node : tmp_block) {
339 global_code.push_back(node);
340 bb.push_back(node);
341 }
342 }
343
344 return pc + 1;
345}
346
347void ShaderIR::PostDecode() {
348 // Deduce texture handler size if needed
349 auto gpu_driver = registry.AccessGuestDriverProfile();
350 DeduceTextureHandlerSize(gpu_driver, used_samplers);
351 // Deduce Indexed Samplers
352 if (!uses_indexed_samplers) {
353 return;
354 }
355 for (auto& sampler : used_samplers) {
356 if (!sampler.is_indexed) {
357 continue;
358 }
359 if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
360 sampler.size = *size;
361 } else {
362 LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
363 sampler.size = 1;
364 }
365 }
366}
367
368} // namespace VideoCommon::Shader