summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/ast.cpp4
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp110
-rw-r--r--src/video_core/shader/const_buffer_locker.h80
-rw-r--r--src/video_core/shader/control_flow.cpp383
-rw-r--r--src/video_core/shader/control_flow.h69
-rw-r--r--src/video_core/shader/decode.cpp41
-rw-r--r--src/video_core/shader/decode/texture.cpp70
-rw-r--r--src/video_core/shader/expr.h21
-rw-r--r--src/video_core/shader/shader_ir.cpp7
-rw-r--r--src/video_core/shader/shader_ir.h24
10 files changed, 638 insertions, 171 deletions
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index e43aecc18..3f96d9076 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -228,6 +228,10 @@ public:
228 inner += expr.value ? "true" : "false"; 228 inner += expr.value ? "true" : "false";
229 } 229 }
230 230
231 void operator()(const ExprGprEqual& expr) {
232 inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
233 }
234
231 const std::string& GetResult() const { 235 const std::string& GetResult() const {
232 return inner; 236 return inner;
233 } 237 }
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <memory>
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/const_buffer_locker.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Engines::SamplerDescriptor;
17
18ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
19 : stage{shader_stage} {}
20
21ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
22 Tegra::Engines::ConstBufferEngineInterface& engine)
23 : stage{shader_stage}, engine{&engine} {}
24
25ConstBufferLocker::~ConstBufferLocker() = default;
26
27std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
28 const std::pair<u32, u32> key = {buffer, offset};
29 const auto iter = keys.find(key);
30 if (iter != keys.end()) {
31 return iter->second;
32 }
33 if (!engine) {
34 return std::nullopt;
35 }
36 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
37 keys.emplace(key, value);
38 return value;
39}
40
41std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
42 const u32 key = offset;
43 const auto iter = bound_samplers.find(key);
44 if (iter != bound_samplers.end()) {
45 return iter->second;
46 }
47 if (!engine) {
48 return std::nullopt;
49 }
50 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
51 bound_samplers.emplace(key, value);
52 return value;
53}
54
55std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
56 u32 buffer, u32 offset) {
57 const std::pair key = {buffer, offset};
58 const auto iter = bindless_samplers.find(key);
59 if (iter != bindless_samplers.end()) {
60 return iter->second;
61 }
62 if (!engine) {
63 return std::nullopt;
64 }
65 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
66 bindless_samplers.emplace(key, value);
67 return value;
68}
69
70void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
71 keys.insert_or_assign({buffer, offset}, value);
72}
73
74void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
75 bound_samplers.insert_or_assign(offset, sampler);
76}
77
78void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
79 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
80}
81
82bool ConstBufferLocker::IsConsistent() const {
83 if (!engine) {
84 return false;
85 }
86 return std::all_of(keys.begin(), keys.end(),
87 [this](const auto& pair) {
88 const auto [cbuf, offset] = pair.first;
89 const auto value = pair.second;
90 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
91 }) &&
92 std::all_of(bound_samplers.begin(), bound_samplers.end(),
93 [this](const auto& sampler) {
94 const auto [key, value] = sampler;
95 return value == engine->AccessBoundSampler(stage, key);
96 }) &&
97 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
98 [this](const auto& sampler) {
99 const auto [cbuf, offset] = sampler.first;
100 const auto value = sampler.second;
101 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
102 });
103}
104
105bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
106 return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
107 bindless_samplers == rhs.bindless_samplers;
108}
109
110} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8#include "common/common_types.h"
9#include "common/hash.h"
10#include "video_core/engines/const_buffer_engine_interface.h"
11
12namespace VideoCommon::Shader {
13
14using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
15using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
16using BindlessSamplerMap =
17 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
18
19/**
20 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
21 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
22 * shader compilation.
23 **/
24class ConstBufferLocker {
25public:
26 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
27
28 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
29 Tegra::Engines::ConstBufferEngineInterface& engine);
30
31 ~ConstBufferLocker();
32
33 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
34 /// not it will obtain it from maxwell3d and register it.
35 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
36
37 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
38
39 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
40
41 /// Inserts a key.
42 void InsertKey(u32 buffer, u32 offset, u32 value);
43
44 /// Inserts a bound sampler key.
45 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
46
47 /// Inserts a bindless sampler key.
48 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
49
50 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
51 /// the same value, false otherwise;
52 bool IsConsistent() const;
53
54 /// Returns true if the keys are equal to the other ones in the locker.
55 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
56
57 /// Gives an getter to the const buffer keys in the database.
58 const KeyMap& GetKeys() const {
59 return keys;
60 }
61
62 /// Gets samplers database.
63 const BoundSamplerMap& GetBoundSamplers() const {
64 return bound_samplers;
65 }
66
67 /// Gets bindless samplers database.
68 const BindlessSamplerMap& GetBindlessSamplers() const {
69 return bindless_samplers;
70 }
71
72private:
73 const Tegra::Engines::ShaderType stage;
74 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
75 KeyMap keys;
76 BoundSamplerMap bound_samplers;
77 BindlessSamplerMap bindless_samplers;
78};
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 9d21f45de..d47c63d9f 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -35,14 +35,20 @@ struct BlockStack {
35 std::stack<u32> pbk_stack{}; 35 std::stack<u32> pbk_stack{};
36}; 36};
37 37
38struct BlockBranchInfo { 38template <typename T, typename... Args>
39 Condition condition{}; 39BlockBranchInfo MakeBranchInfo(Args&&... args) {
40 s32 address{exit_branch}; 40 static_assert(std::is_convertible_v<T, BranchData>);
41 bool kill{}; 41 return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
42 bool is_sync{}; 42}
43 bool is_brk{}; 43
44 bool ignore{}; 44bool BlockBranchIsIgnored(BlockBranchInfo first) {
45}; 45 bool ignore = false;
46 if (std::holds_alternative<SingleBranch>(*first)) {
47 const auto branch = std::get_if<SingleBranch>(first.get());
48 ignore = branch->ignore;
49 }
50 return ignore;
51}
46 52
47struct BlockInfo { 53struct BlockInfo {
48 u32 start{}; 54 u32 start{};
@@ -56,10 +62,11 @@ struct BlockInfo {
56}; 62};
57 63
58struct CFGRebuildState { 64struct CFGRebuildState {
59 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, 65 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
60 const u32 start) 66 : program_code{program_code}, start{start}, locker{locker} {}
61 : start{start}, program_code{program_code}, program_size{program_size} {}
62 67
68 const ProgramCode& program_code;
69 ConstBufferLocker& locker;
63 u32 start{}; 70 u32 start{};
64 std::vector<BlockInfo> block_info{}; 71 std::vector<BlockInfo> block_info{};
65 std::list<u32> inspect_queries{}; 72 std::list<u32> inspect_queries{};
@@ -69,8 +76,6 @@ struct CFGRebuildState {
69 std::map<u32, u32> ssy_labels{}; 76 std::map<u32, u32> ssy_labels{};
70 std::map<u32, u32> pbk_labels{}; 77 std::map<u32, u32> pbk_labels{};
71 std::unordered_map<u32, BlockStack> stacks{}; 78 std::unordered_map<u32, BlockStack> stacks{};
72 const ProgramCode& program_code;
73 const std::size_t program_size;
74 ASTManager* manager; 79 ASTManager* manager;
75}; 80};
76 81
@@ -124,10 +129,116 @@ enum class ParseResult : u32 {
124 AbnormalFlow, 129 AbnormalFlow,
125}; 130};
126 131
132struct BranchIndirectInfo {
133 u32 buffer{};
134 u32 offset{};
135 u32 entries{};
136 s32 relative_position{};
137};
138
139std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
140 u32 start_address, u32 current_position) {
141 const u32 shader_start = state.start;
142 u32 pos = current_position;
143 BranchIndirectInfo result{};
144 u64 track_register = 0;
145
146 // Step 0 Get BRX Info
147 const Instruction instr = {state.program_code[pos]};
148 const auto opcode = OpCode::Decode(instr);
149 if (opcode->get().GetId() != OpCode::Id::BRX) {
150 return std::nullopt;
151 }
152 if (instr.brx.constant_buffer != 0) {
153 return std::nullopt;
154 }
155 track_register = instr.gpr8.Value();
156 result.relative_position = instr.brx.GetBranchExtend();
157 pos--;
158 bool found_track = false;
159
160 // Step 1 Track LDC
161 while (pos >= shader_start) {
162 if (IsSchedInstruction(pos, shader_start)) {
163 pos--;
164 continue;
165 }
166 const Instruction instr = {state.program_code[pos]};
167 const auto opcode = OpCode::Decode(instr);
168 if (opcode->get().GetId() == OpCode::Id::LD_C) {
169 if (instr.gpr0.Value() == track_register &&
170 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
171 result.buffer = instr.cbuf36.index.Value();
172 result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
173 track_register = instr.gpr8.Value();
174 pos--;
175 found_track = true;
176 break;
177 }
178 }
179 pos--;
180 }
181
182 if (!found_track) {
183 return std::nullopt;
184 }
185 found_track = false;
186
187 // Step 2 Track SHL
188 while (pos >= shader_start) {
189 if (IsSchedInstruction(pos, shader_start)) {
190 pos--;
191 continue;
192 }
193 const Instruction instr = state.program_code[pos];
194 const auto opcode = OpCode::Decode(instr);
195 if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
196 if (instr.gpr0.Value() == track_register) {
197 track_register = instr.gpr8.Value();
198 pos--;
199 found_track = true;
200 break;
201 }
202 }
203 pos--;
204 }
205
206 if (!found_track) {
207 return std::nullopt;
208 }
209 found_track = false;
210
211 // Step 3 Track IMNMX
212 while (pos >= shader_start) {
213 if (IsSchedInstruction(pos, shader_start)) {
214 pos--;
215 continue;
216 }
217 const Instruction instr = state.program_code[pos];
218 const auto opcode = OpCode::Decode(instr);
219 if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
220 if (instr.gpr0.Value() == track_register) {
221 track_register = instr.gpr8.Value();
222 result.entries = instr.alu.GetSignedImm20_20() + 1;
223 pos--;
224 found_track = true;
225 break;
226 }
227 }
228 pos--;
229 }
230
231 if (!found_track) {
232 return std::nullopt;
233 }
234 return result;
235}
236
127std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { 237std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
128 u32 offset = static_cast<u32>(address); 238 u32 offset = static_cast<u32>(address);
129 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); 239 const u32 end_address = static_cast<u32>(state.program_code.size());
130 ParseInfo parse_info{}; 240 ParseInfo parse_info{};
241 SingleBranch single_branch{};
131 242
132 const auto insert_label = [](CFGRebuildState& state, u32 address) { 243 const auto insert_label = [](CFGRebuildState& state, u32 address) {
133 const auto pair = state.labels.emplace(address); 244 const auto pair = state.labels.emplace(address);
@@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
140 if (offset >= end_address) { 251 if (offset >= end_address) {
141 // ASSERT_OR_EXECUTE can't be used, as it ignores the break 252 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
142 ASSERT_MSG(false, "Shader passed the current limit!"); 253 ASSERT_MSG(false, "Shader passed the current limit!");
143 parse_info.branch_info.address = exit_branch; 254
144 parse_info.branch_info.ignore = false; 255 single_branch.address = exit_branch;
256 single_branch.ignore = false;
145 break; 257 break;
146 } 258 }
147 if (state.registered.count(offset) != 0) { 259 if (state.registered.count(offset) != 0) {
148 parse_info.branch_info.address = offset; 260 single_branch.address = offset;
149 parse_info.branch_info.ignore = true; 261 single_branch.ignore = true;
150 break; 262 break;
151 } 263 }
152 if (IsSchedInstruction(offset, state.start)) { 264 if (IsSchedInstruction(offset, state.start)) {
@@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
163 switch (opcode->get().GetId()) { 275 switch (opcode->get().GetId()) {
164 case OpCode::Id::EXIT: { 276 case OpCode::Id::EXIT: {
165 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 277 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
166 parse_info.branch_info.condition.predicate = 278 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
167 GetPredicate(pred_index, instr.negate_pred != 0); 279 if (single_branch.condition.predicate == Pred::NeverExecute) {
168 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
169 offset++; 280 offset++;
170 continue; 281 continue;
171 } 282 }
172 const ConditionCode cc = instr.flow_condition_code; 283 const ConditionCode cc = instr.flow_condition_code;
173 parse_info.branch_info.condition.cc = cc; 284 single_branch.condition.cc = cc;
174 if (cc == ConditionCode::F) { 285 if (cc == ConditionCode::F) {
175 offset++; 286 offset++;
176 continue; 287 continue;
177 } 288 }
178 parse_info.branch_info.address = exit_branch; 289 single_branch.address = exit_branch;
179 parse_info.branch_info.kill = false; 290 single_branch.kill = false;
180 parse_info.branch_info.is_sync = false; 291 single_branch.is_sync = false;
181 parse_info.branch_info.is_brk = false; 292 single_branch.is_brk = false;
182 parse_info.branch_info.ignore = false; 293 single_branch.ignore = false;
183 parse_info.end_address = offset; 294 parse_info.end_address = offset;
295 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
296 single_branch.condition, single_branch.address, single_branch.kill,
297 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
184 298
185 return {ParseResult::ControlCaught, parse_info}; 299 return {ParseResult::ControlCaught, parse_info};
186 } 300 }
@@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
189 return {ParseResult::AbnormalFlow, parse_info}; 303 return {ParseResult::AbnormalFlow, parse_info};
190 } 304 }
191 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 305 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
192 parse_info.branch_info.condition.predicate = 306 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
193 GetPredicate(pred_index, instr.negate_pred != 0); 307 if (single_branch.condition.predicate == Pred::NeverExecute) {
194 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
195 offset++; 308 offset++;
196 continue; 309 continue;
197 } 310 }
198 const ConditionCode cc = instr.flow_condition_code; 311 const ConditionCode cc = instr.flow_condition_code;
199 parse_info.branch_info.condition.cc = cc; 312 single_branch.condition.cc = cc;
200 if (cc == ConditionCode::F) { 313 if (cc == ConditionCode::F) {
201 offset++; 314 offset++;
202 continue; 315 continue;
203 } 316 }
204 const u32 branch_offset = offset + instr.bra.GetBranchTarget(); 317 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
205 if (branch_offset == 0) { 318 if (branch_offset == 0) {
206 parse_info.branch_info.address = exit_branch; 319 single_branch.address = exit_branch;
207 } else { 320 } else {
208 parse_info.branch_info.address = branch_offset; 321 single_branch.address = branch_offset;
209 } 322 }
210 insert_label(state, branch_offset); 323 insert_label(state, branch_offset);
211 parse_info.branch_info.kill = false; 324 single_branch.kill = false;
212 parse_info.branch_info.is_sync = false; 325 single_branch.is_sync = false;
213 parse_info.branch_info.is_brk = false; 326 single_branch.is_brk = false;
214 parse_info.branch_info.ignore = false; 327 single_branch.ignore = false;
215 parse_info.end_address = offset; 328 parse_info.end_address = offset;
329 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
330 single_branch.condition, single_branch.address, single_branch.kill,
331 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
216 332
217 return {ParseResult::ControlCaught, parse_info}; 333 return {ParseResult::ControlCaught, parse_info};
218 } 334 }
219 case OpCode::Id::SYNC: { 335 case OpCode::Id::SYNC: {
220 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 336 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
221 parse_info.branch_info.condition.predicate = 337 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
222 GetPredicate(pred_index, instr.negate_pred != 0); 338 if (single_branch.condition.predicate == Pred::NeverExecute) {
223 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
224 offset++; 339 offset++;
225 continue; 340 continue;
226 } 341 }
227 const ConditionCode cc = instr.flow_condition_code; 342 const ConditionCode cc = instr.flow_condition_code;
228 parse_info.branch_info.condition.cc = cc; 343 single_branch.condition.cc = cc;
229 if (cc == ConditionCode::F) { 344 if (cc == ConditionCode::F) {
230 offset++; 345 offset++;
231 continue; 346 continue;
232 } 347 }
233 parse_info.branch_info.address = unassigned_branch; 348 single_branch.address = unassigned_branch;
234 parse_info.branch_info.kill = false; 349 single_branch.kill = false;
235 parse_info.branch_info.is_sync = true; 350 single_branch.is_sync = true;
236 parse_info.branch_info.is_brk = false; 351 single_branch.is_brk = false;
237 parse_info.branch_info.ignore = false; 352 single_branch.ignore = false;
238 parse_info.end_address = offset; 353 parse_info.end_address = offset;
354 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
355 single_branch.condition, single_branch.address, single_branch.kill,
356 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
239 357
240 return {ParseResult::ControlCaught, parse_info}; 358 return {ParseResult::ControlCaught, parse_info};
241 } 359 }
242 case OpCode::Id::BRK: { 360 case OpCode::Id::BRK: {
243 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 361 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
244 parse_info.branch_info.condition.predicate = 362 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
245 GetPredicate(pred_index, instr.negate_pred != 0); 363 if (single_branch.condition.predicate == Pred::NeverExecute) {
246 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
247 offset++; 364 offset++;
248 continue; 365 continue;
249 } 366 }
250 const ConditionCode cc = instr.flow_condition_code; 367 const ConditionCode cc = instr.flow_condition_code;
251 parse_info.branch_info.condition.cc = cc; 368 single_branch.condition.cc = cc;
252 if (cc == ConditionCode::F) { 369 if (cc == ConditionCode::F) {
253 offset++; 370 offset++;
254 continue; 371 continue;
255 } 372 }
256 parse_info.branch_info.address = unassigned_branch; 373 single_branch.address = unassigned_branch;
257 parse_info.branch_info.kill = false; 374 single_branch.kill = false;
258 parse_info.branch_info.is_sync = false; 375 single_branch.is_sync = false;
259 parse_info.branch_info.is_brk = true; 376 single_branch.is_brk = true;
260 parse_info.branch_info.ignore = false; 377 single_branch.ignore = false;
261 parse_info.end_address = offset; 378 parse_info.end_address = offset;
379 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
380 single_branch.condition, single_branch.address, single_branch.kill,
381 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
262 382
263 return {ParseResult::ControlCaught, parse_info}; 383 return {ParseResult::ControlCaught, parse_info};
264 } 384 }
265 case OpCode::Id::KIL: { 385 case OpCode::Id::KIL: {
266 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 386 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
267 parse_info.branch_info.condition.predicate = 387 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
268 GetPredicate(pred_index, instr.negate_pred != 0); 388 if (single_branch.condition.predicate == Pred::NeverExecute) {
269 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
270 offset++; 389 offset++;
271 continue; 390 continue;
272 } 391 }
273 const ConditionCode cc = instr.flow_condition_code; 392 const ConditionCode cc = instr.flow_condition_code;
274 parse_info.branch_info.condition.cc = cc; 393 single_branch.condition.cc = cc;
275 if (cc == ConditionCode::F) { 394 if (cc == ConditionCode::F) {
276 offset++; 395 offset++;
277 continue; 396 continue;
278 } 397 }
279 parse_info.branch_info.address = exit_branch; 398 single_branch.address = exit_branch;
280 parse_info.branch_info.kill = true; 399 single_branch.kill = true;
281 parse_info.branch_info.is_sync = false; 400 single_branch.is_sync = false;
282 parse_info.branch_info.is_brk = false; 401 single_branch.is_brk = false;
283 parse_info.branch_info.ignore = false; 402 single_branch.ignore = false;
284 parse_info.end_address = offset; 403 parse_info.end_address = offset;
404 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
405 single_branch.condition, single_branch.address, single_branch.kill,
406 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
285 407
286 return {ParseResult::ControlCaught, parse_info}; 408 return {ParseResult::ControlCaught, parse_info};
287 } 409 }
@@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
298 break; 420 break;
299 } 421 }
300 case OpCode::Id::BRX: { 422 case OpCode::Id::BRX: {
423 auto tmp = TrackBranchIndirectInfo(state, address, offset);
424 if (tmp) {
425 auto result = *tmp;
426 std::vector<CaseBranch> branches{};
427 s32 pc_target = offset + result.relative_position;
428 for (u32 i = 0; i < result.entries; i++) {
429 auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
430 if (!k) {
431 return {ParseResult::AbnormalFlow, parse_info};
432 }
433 u32 value = *k;
434 u32 target = static_cast<u32>((value >> 3) + pc_target);
435 insert_label(state, target);
436 branches.emplace_back(value, target);
437 }
438 parse_info.end_address = offset;
439 parse_info.branch_info = MakeBranchInfo<MultiBranch>(
440 static_cast<u32>(instr.gpr8.Value()), std::move(branches));
441
442 return {ParseResult::ControlCaught, parse_info};
443 } else {
444 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
445 }
301 return {ParseResult::AbnormalFlow, parse_info}; 446 return {ParseResult::AbnormalFlow, parse_info};
302 } 447 }
303 default: 448 default:
@@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
306 451
307 offset++; 452 offset++;
308 } 453 }
309 parse_info.branch_info.kill = false; 454 single_branch.kill = false;
310 parse_info.branch_info.is_sync = false; 455 single_branch.is_sync = false;
311 parse_info.branch_info.is_brk = false; 456 single_branch.is_brk = false;
312 parse_info.end_address = offset - 1; 457 parse_info.end_address = offset - 1;
458 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
459 single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
460 single_branch.is_brk, single_branch.ignore);
313 return {ParseResult::BlockEnd, parse_info}; 461 return {ParseResult::BlockEnd, parse_info};
314} 462}
315 463
@@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
333 BlockInfo& current_block = state.block_info[block_index]; 481 BlockInfo& current_block = state.block_info[block_index];
334 current_block.end = address - 1; 482 current_block.end = address - 1;
335 new_block.branch = current_block.branch; 483 new_block.branch = current_block.branch;
336 BlockBranchInfo forward_branch{}; 484 BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
337 forward_branch.address = address; 485 const auto branch = std::get_if<SingleBranch>(forward_branch.get());
338 forward_branch.ignore = true; 486 branch->address = address;
487 branch->ignore = true;
339 current_block.branch = forward_branch; 488 current_block.branch = forward_branch;
340 return true; 489 return true;
341 } 490 }
@@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
350 499
351 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); 500 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
352 block_info.branch = parse_info.branch_info; 501 block_info.branch = parse_info.branch_info;
353 if (parse_info.branch_info.condition.IsUnconditional()) { 502 if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
503 const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
504 if (branch->condition.IsUnconditional()) {
505 return true;
506 }
507 const u32 fallthrough_address = parse_info.end_address + 1;
508 state.inspect_queries.push_front(fallthrough_address);
354 return true; 509 return true;
355 } 510 }
356
357 const u32 fallthrough_address = parse_info.end_address + 1;
358 state.inspect_queries.push_front(fallthrough_address);
359 return true; 511 return true;
360} 512}
361 513
@@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) {
393 state.queries.pop_front(); 545 state.queries.pop_front();
394 gather_labels(q2.ssy_stack, state.ssy_labels, block); 546 gather_labels(q2.ssy_stack, state.ssy_labels, block);
395 gather_labels(q2.pbk_stack, state.pbk_labels, block); 547 gather_labels(q2.pbk_stack, state.pbk_labels, block);
396 if (!block.branch.condition.IsUnconditional()) { 548 if (std::holds_alternative<SingleBranch>(*block.branch)) {
397 q2.address = block.end + 1; 549 const auto branch = std::get_if<SingleBranch>(block.branch.get());
398 state.queries.push_back(q2); 550 if (!branch->condition.IsUnconditional()) {
399 } 551 q2.address = block.end + 1;
552 state.queries.push_back(q2);
553 }
400 554
401 Query conditional_query{q2}; 555 Query conditional_query{q2};
402 if (block.branch.is_sync) { 556 if (branch->is_sync) {
403 if (block.branch.address == unassigned_branch) { 557 if (branch->address == unassigned_branch) {
404 block.branch.address = conditional_query.ssy_stack.top(); 558 branch->address = conditional_query.ssy_stack.top();
559 }
560 conditional_query.ssy_stack.pop();
405 } 561 }
406 conditional_query.ssy_stack.pop(); 562 if (branch->is_brk) {
407 } 563 if (branch->address == unassigned_branch) {
408 if (block.branch.is_brk) { 564 branch->address = conditional_query.pbk_stack.top();
409 if (block.branch.address == unassigned_branch) { 565 }
410 block.branch.address = conditional_query.pbk_stack.top(); 566 conditional_query.pbk_stack.pop();
411 } 567 }
412 conditional_query.pbk_stack.pop(); 568 conditional_query.address = branch->address;
569 state.queries.push_back(std::move(conditional_query));
570 return true;
571 }
572 const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
573 for (const auto& branch_case : multi_branch->branches) {
574 Query conditional_query{q2};
575 conditional_query.address = branch_case.address;
576 state.queries.push_back(std::move(conditional_query));
413 } 577 }
414 conditional_query.address = block.branch.address;
415 state.queries.push_back(std::move(conditional_query));
416 return true; 578 return true;
417} 579}
580
418} // Anonymous namespace 581} // Anonymous namespace
419 582
420void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { 583void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
421 const auto get_expr = ([&](const Condition& cond) -> Expr { 584 const auto get_expr = ([&](const Condition& cond) -> Expr {
422 Expr result{}; 585 Expr result{};
423 if (cond.cc != ConditionCode::T) { 586 if (cond.cc != ConditionCode::T) {
@@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
444 } 607 }
445 return MakeExpr<ExprBoolean>(true); 608 return MakeExpr<ExprBoolean>(true);
446 }); 609 });
447 if (branch.address < 0) { 610 if (std::holds_alternative<SingleBranch>(*branch_info)) {
448 if (branch.kill) { 611 const auto branch = std::get_if<SingleBranch>(branch_info.get());
449 mm.InsertReturn(get_expr(branch.condition), true); 612 if (branch->address < 0) {
613 if (branch->kill) {
614 mm.InsertReturn(get_expr(branch->condition), true);
615 return;
616 }
617 mm.InsertReturn(get_expr(branch->condition), false);
450 return; 618 return;
451 } 619 }
452 mm.InsertReturn(get_expr(branch.condition), false); 620 mm.InsertGoto(get_expr(branch->condition), branch->address);
453 return; 621 return;
454 } 622 }
455 mm.InsertGoto(get_expr(branch.condition), branch.address); 623 const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
624 for (const auto& branch_case : multi_branch->branches) {
625 mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
626 branch_case.address);
627 }
456} 628}
457 629
458void DecompileShader(CFGRebuildState& state) { 630void DecompileShader(CFGRebuildState& state) {
@@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) {
464 if (state.labels.count(block.start) != 0) { 636 if (state.labels.count(block.start) != 0) {
465 state.manager->InsertLabel(block.start); 637 state.manager->InsertLabel(block.start);
466 } 638 }
467 u32 end = block.branch.ignore ? block.end + 1 : block.end; 639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 u32 end = ignore ? block.end + 1 : block.end;
468 state.manager->InsertBlock(block.start, end); 641 state.manager->InsertBlock(block.start, end);
469 if (!block.branch.ignore) { 642 if (!ignore) {
470 InsertBranch(*state.manager, block.branch); 643 InsertBranch(*state.manager, block.branch);
471 } 644 }
472 } 645 }
473 state.manager->Decompile(); 646 state.manager->Decompile();
474} 647}
475 648
476std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, 649std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
477 std::size_t program_size, u32 start_address, 650 const CompilerSettings& settings,
478 const CompilerSettings& settings) { 651 ConstBufferLocker& locker) {
479 auto result_out = std::make_unique<ShaderCharacteristics>(); 652 auto result_out = std::make_unique<ShaderCharacteristics>();
480 if (settings.depth == CompileDepth::BruteForce) { 653 if (settings.depth == CompileDepth::BruteForce) {
481 result_out->settings.depth = CompileDepth::BruteForce; 654 result_out->settings.depth = CompileDepth::BruteForce;
482 return result_out; 655 return result_out;
483 } 656 }
484 657
485 CFGRebuildState state{program_code, program_size, start_address}; 658 CFGRebuildState state{program_code, start_address, locker};
486 // Inspect Code and generate blocks 659 // Inspect Code and generate blocks
487 state.labels.clear(); 660 state.labels.clear();
488 state.labels.emplace(start_address); 661 state.labels.emplace(start_address);
@@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
547 ShaderBlock new_block{}; 720 ShaderBlock new_block{};
548 new_block.start = block.start; 721 new_block.start = block.start;
549 new_block.end = block.end; 722 new_block.end = block.end;
550 new_block.ignore_branch = block.branch.ignore; 723 new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
551 if (!new_block.ignore_branch) { 724 if (!new_block.ignore_branch) {
552 new_block.branch.cond = block.branch.condition; 725 new_block.branch = block.branch;
553 new_block.branch.kills = block.branch.kill;
554 new_block.branch.address = block.branch.address;
555 } 726 }
556 result_out->end = std::max(result_out->end, block.end); 727 result_out->end = std::max(result_out->end, block.end);
557 result_out->blocks.push_back(new_block); 728 result_out->blocks.push_back(new_block);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 37e987d62..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -7,6 +7,7 @@
7#include <list> 7#include <list>
8#include <optional> 8#include <optional>
9#include <set> 9#include <set>
10#include <variant>
10 11
11#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
@@ -37,29 +38,61 @@ struct Condition {
37 } 38 }
38}; 39};
39 40
40struct ShaderBlock { 41class SingleBranch {
41 struct Branch { 42public:
42 Condition cond{}; 43 SingleBranch() = default;
43 bool kills{}; 44 SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
44 s32 address{}; 45 bool ignore)
46 : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
47 ignore{ignore} {}
48
49 bool operator==(const SingleBranch& b) const {
50 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
51 std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
52 }
53
54 bool operator!=(const SingleBranch& b) const {
55 return !operator==(b);
56 }
57
58 Condition condition{};
59 s32 address{exit_branch};
60 bool kill{};
61 bool is_sync{};
62 bool is_brk{};
63 bool ignore{};
64};
45 65
46 bool operator==(const Branch& b) const { 66struct CaseBranch {
47 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); 67 CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
48 } 68 u32 cmp_value;
69 u32 address;
70};
71
72class MultiBranch {
73public:
74 MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
75 : gpr{gpr}, branches{std::move(branches)} {}
76
77 u32 gpr{};
78 std::vector<CaseBranch> branches{};
79};
80
81using BranchData = std::variant<SingleBranch, MultiBranch>;
82using BlockBranchInfo = std::shared_ptr<BranchData>;
49 83
50 bool operator!=(const Branch& b) const { 84bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
51 return !operator==(b);
52 }
53 };
54 85
86struct ShaderBlock {
55 u32 start{}; 87 u32 start{};
56 u32 end{}; 88 u32 end{};
57 bool ignore_branch{}; 89 bool ignore_branch{};
58 Branch branch{}; 90 BlockBranchInfo branch{};
59 91
60 bool operator==(const ShaderBlock& sb) const { 92 bool operator==(const ShaderBlock& sb) const {
61 return std::tie(start, end, ignore_branch, branch) == 93 return std::tie(start, end, ignore_branch) ==
62 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); 94 std::tie(sb.start, sb.end, sb.ignore_branch) &&
95 BlockBranchInfoAreEqual(branch, sb.branch);
63 } 96 }
64 97
65 bool operator!=(const ShaderBlock& sb) const { 98 bool operator!=(const ShaderBlock& sb) const {
@@ -76,8 +109,8 @@ struct ShaderCharacteristics {
76 CompilerSettings settings{}; 109 CompilerSettings settings{};
77}; 110};
78 111
79std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, 112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
80 std::size_t program_size, u32 start_address, 113 const CompilerSettings& settings,
81 const CompilerSettings& settings); 114 ConstBufferLocker& locker);
82 115
83} // namespace VideoCommon::Shader 116} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2626b1616..21fb9cb83 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
33 return (absolute_offset % SchedPeriod) == 0; 33 return (absolute_offset % SchedPeriod) == 0;
34} 34}
35 35
36} // namespace 36} // Anonymous namespace
37 37
38class ASTDecoder { 38class ASTDecoder {
39public: 39public:
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
103 103
104 decompiled = false; 104 decompiled = false;
105 auto info = ScanFlow(program_code, program_size, main_offset, settings); 105 auto info = ScanFlow(program_code, main_offset, settings, locker);
106 auto& shader_info = *info; 106 auto& shader_info = *info;
107 coverage_begin = shader_info.start; 107 coverage_begin = shader_info.start;
108 coverage_end = shader_info.end; 108 coverage_end = shader_info.end;
@@ -155,7 +155,7 @@ void ShaderIR::Decode() {
155 [[fallthrough]]; 155 [[fallthrough]];
156 case CompileDepth::BruteForce: { 156 case CompileDepth::BruteForce: {
157 coverage_begin = main_offset; 157 coverage_begin = main_offset;
158 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); 158 const std::size_t shader_end = program_code.size();
159 coverage_end = shader_end; 159 coverage_end = shader_end;
160 for (u32 label = main_offset; label < shader_end; label++) { 160 for (u32 label = main_offset; label < shader_end; label++) {
161 basic_blocks.insert({label, DecodeRange(label, label + 1)}); 161 basic_blocks.insert({label, DecodeRange(label, label + 1)});
@@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
198 } 198 }
199 return result; 199 return result;
200 }; 200 };
201 if (block.branch.address < 0) { 201 if (std::holds_alternative<SingleBranch>(*block.branch)) {
202 if (block.branch.kills) { 202 auto branch = std::get_if<SingleBranch>(block.branch.get());
203 Node n = Operation(OperationCode::Discard); 203 if (branch->address < 0) {
204 n = apply_conditions(block.branch.cond, n); 204 if (branch->kill) {
205 Node n = Operation(OperationCode::Discard);
206 n = apply_conditions(branch->condition, n);
207 bb.push_back(n);
208 global_code.push_back(n);
209 return;
210 }
211 Node n = Operation(OperationCode::Exit);
212 n = apply_conditions(branch->condition, n);
205 bb.push_back(n); 213 bb.push_back(n);
206 global_code.push_back(n); 214 global_code.push_back(n);
207 return; 215 return;
208 } 216 }
209 Node n = Operation(OperationCode::Exit); 217 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
210 n = apply_conditions(block.branch.cond, n); 218 n = apply_conditions(branch->condition, n);
211 bb.push_back(n); 219 bb.push_back(n);
212 global_code.push_back(n); 220 global_code.push_back(n);
213 return; 221 return;
214 } 222 }
215 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); 223 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
216 n = apply_conditions(block.branch.cond, n); 224 Node op_a = GetRegister(multi_branch->gpr);
217 bb.push_back(n); 225 for (auto& branch_case : multi_branch->branches) {
218 global_code.push_back(n); 226 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
227 Node op_b = Immediate(branch_case.cmp_value);
228 Node condition =
229 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
230 auto result = Conditional(condition, {n});
231 bb.push_back(result);
232 global_code.push_back(result);
233 }
219} 234}
220 235
221u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 236u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 295445498..d61e656b7 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
141 const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); 141 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
142 142
143 const auto& sampler = 143 const auto& sampler =
144 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); 144 GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
145 145
146 Node4 values; 146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) { 147 for (u32 element = 0; element < values.size(); ++element) {
@@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
165 // Sadly, not all texture instructions specify the type of texture their sampler 165 // Sadly, not all texture instructions specify the type of texture their sampler
166 // uses. This must be fixed at a later instance. 166 // uses. This must be fixed at a later instance.
167 const auto& sampler = 167 const auto& sampler =
168 is_bindless 168 is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
169 ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
170 false)
171 : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
172 169
173 u32 indexer = 0; 170 u32 indexer = 0;
174 switch (instr.txq.query_type) { 171 switch (instr.txq.query_type) {
@@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
207 204
208 auto texture_type = instr.tmml.texture_type.Value(); 205 auto texture_type = instr.tmml.texture_type.Value();
209 const bool is_array = instr.tmml.array != 0; 206 const bool is_array = instr.tmml.array != 0;
210 const auto& sampler = is_bindless 207 const auto& sampler =
211 ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) 208 is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
212 : GetSampler(instr.sampler, texture_type, is_array, false); 209 : GetSampler(instr.sampler, {{texture_type, is_array, false}});
213 210
214 std::vector<Node> coords; 211 std::vector<Node> coords;
215 212
@@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
285 return pc; 282 return pc;
286} 283}
287 284
288const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, 285const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
289 bool is_array, bool is_shadow) { 286 std::optional<SamplerInfo> sampler_info) {
290 const auto offset = static_cast<std::size_t>(sampler.index.Value()); 287 const auto offset = static_cast<u32>(sampler.index.Value());
288
289 Tegra::Shader::TextureType type;
290 bool is_array;
291 bool is_shadow;
292 if (sampler_info) {
293 type = sampler_info->type;
294 is_array = sampler_info->is_array;
295 is_shadow = sampler_info->is_shadow;
296 } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
297 type = sampler->texture_type.Value();
298 is_array = sampler->is_array.Value() != 0;
299 is_shadow = sampler->is_shadow.Value() != 0;
300 } else {
301 type = Tegra::Shader::TextureType::Texture2D;
302 is_array = false;
303 is_shadow = false;
304 }
291 305
292 // If this sampler has already been used, return the existing mapping. 306 // If this sampler has already been used, return the existing mapping.
293 const auto itr = 307 const auto itr =
@@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
303 const std::size_t next_index = used_samplers.size(); 317 const std::size_t next_index = used_samplers.size();
304 const Sampler entry{offset, next_index, type, is_array, is_shadow}; 318 const Sampler entry{offset, next_index, type, is_array, is_shadow};
305 return *used_samplers.emplace(entry).first; 319 return *used_samplers.emplace(entry).first;
306} 320} // namespace VideoCommon::Shader
307 321
308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 322const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
309 bool is_array, bool is_shadow) { 323 std::optional<SamplerInfo> sampler_info) {
310 const Node sampler_register = GetRegister(reg); 324 const Node sampler_register = GetRegister(reg);
311 const auto [base_sampler, cbuf_index, cbuf_offset] = 325 const auto [base_sampler, cbuf_index, cbuf_offset] =
312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 326 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
313 ASSERT(base_sampler != nullptr); 327 ASSERT(base_sampler != nullptr);
314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 328 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
329 Tegra::Shader::TextureType type;
330 bool is_array;
331 bool is_shadow;
332 if (sampler_info) {
333 type = sampler_info->type;
334 is_array = sampler_info->is_array;
335 is_shadow = sampler_info->is_shadow;
336 } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
337 type = sampler->texture_type.Value();
338 is_array = sampler->is_array.Value() != 0;
339 is_shadow = sampler->is_shadow.Value() != 0;
340 } else {
341 type = Tegra::Shader::TextureType::Texture2D;
342 is_array = false;
343 is_shadow = false;
344 }
315 345
316 // If this sampler has already been used, return the existing mapping. 346 // If this sampler has already been used, return the existing mapping.
317 const auto itr = 347 const auto itr =
@@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
411 (texture_type == TextureType::TextureCube && is_array && is_shadow), 441 (texture_type == TextureType::TextureCube && is_array && is_shadow),
412 "This method is not supported."); 442 "This method is not supported.");
413 443
414 const auto& sampler = is_bindless 444 const auto& sampler =
415 ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) 445 is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
416 : GetSampler(instr.sampler, texture_type, is_array, is_shadow); 446 : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
417 447
418 const bool lod_needed = process_mode == TextureProcessMode::LZ || 448 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
419 process_mode == TextureProcessMode::LL || 449 process_mode == TextureProcessMode::LL ||
@@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
577 dc = GetRegister(parameter_register++); 607 dc = GetRegister(parameter_register++);
578 } 608 }
579 609
580 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); 610 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
581 611
582 Node4 values; 612 Node4 values;
583 for (u32 element = 0; element < values.size(); ++element) { 613 for (u32 element = 0; element < values.size(); ++element) {
@@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
610 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 640 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
611 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 641 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
612 642
613 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); 643 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
614 644
615 Node4 values; 645 Node4 values;
616 for (u32 element = 0; element < values.size(); ++element) { 646 for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
646 // When lod is used always is in gpr20 676 // When lod is used always is in gpr20
647 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 677 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
648 678
649 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); 679 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
650 680
651 Node4 values; 681 Node4 values;
652 for (u32 element = 0; element < values.size(); ++element) { 682 for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
index d3dcd00ec..4e8264367 100644
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@@ -17,13 +17,14 @@ using Tegra::Shader::Pred;
17class ExprAnd; 17class ExprAnd;
18class ExprBoolean; 18class ExprBoolean;
19class ExprCondCode; 19class ExprCondCode;
20class ExprGprEqual;
20class ExprNot; 21class ExprNot;
21class ExprOr; 22class ExprOr;
22class ExprPredicate; 23class ExprPredicate;
23class ExprVar; 24class ExprVar;
24 25
25using ExprData = 26using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
26 std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>; 27 ExprBoolean, ExprGprEqual>;
27using Expr = std::shared_ptr<ExprData>; 28using Expr = std::shared_ptr<ExprData>;
28 29
29class ExprAnd final { 30class ExprAnd final {
@@ -118,6 +119,22 @@ public:
118 bool value; 119 bool value;
119}; 120};
120 121
122class ExprGprEqual final {
123public:
124 ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
125
126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value;
128 }
129
130 bool operator!=(const ExprGprEqual& b) const {
131 return !operator==(b);
132 }
133
134 u32 gpr;
135 u32 value;
136};
137
121template <typename T, typename... Args> 138template <typename T, typename... Args>
122Expr MakeExpr(Args&&... args) { 139Expr MakeExpr(Args&&... args) {
123 static_assert(std::is_convertible_v<T, ExprData>); 140 static_assert(std::is_convertible_v<T, ExprData>);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index b10d376cb..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -23,10 +23,9 @@ using Tegra::Shader::PredCondition;
23using Tegra::Shader::PredOperation; 23using Tegra::Shader::PredOperation;
24using Tegra::Shader::Register; 24using Tegra::Shader::Register;
25 25
26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, 26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
27 CompilerSettings settings) 27 ConstBufferLocker& locker)
28 : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, 28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
29 program_manager{true, true}, settings{settings} {
30 Decode(); 29 Decode();
31} 30}
32 31
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 02ddf2a75..1fd44bde1 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -17,6 +17,7 @@
17#include "video_core/engines/shader_header.h" 17#include "video_core/engines/shader_header.h"
18#include "video_core/shader/ast.h" 18#include "video_core/shader/ast.h"
19#include "video_core/shader/compiler_settings.h" 19#include "video_core/shader/compiler_settings.h"
20#include "video_core/shader/const_buffer_locker.h"
20#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
21 22
22namespace VideoCommon::Shader { 23namespace VideoCommon::Shader {
@@ -66,8 +67,8 @@ struct GlobalMemoryUsage {
66 67
67class ShaderIR final { 68class ShaderIR final {
68public: 69public:
69 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, 70 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
70 CompilerSettings settings); 71 ConstBufferLocker& locker);
71 ~ShaderIR(); 72 ~ShaderIR();
72 73
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 74 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -172,6 +173,13 @@ public:
172 173
173private: 174private:
174 friend class ASTDecoder; 175 friend class ASTDecoder;
176
177 struct SamplerInfo {
178 Tegra::Shader::TextureType type;
179 bool is_array;
180 bool is_shadow;
181 };
182
175 void Decode(); 183 void Decode();
176 184
177 NodeBlock DecodeRange(u32 begin, u32 end); 185 NodeBlock DecodeRange(u32 begin, u32 end);
@@ -296,12 +304,11 @@ private:
296 304
297 /// Accesses a texture sampler 305 /// Accesses a texture sampler
298 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, 306 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
299 Tegra::Shader::TextureType type, bool is_array, bool is_shadow); 307 std::optional<SamplerInfo> sampler_info);
300 308
301 // Accesses a texture sampler for a bindless texture. 309 // Accesses a texture sampler for a bindless texture.
302 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, 310 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
303 Tegra::Shader::TextureType type, bool is_array, 311 std::optional<SamplerInfo> sampler_info);
304 bool is_shadow);
305 312
306 /// Accesses an image. 313 /// Accesses an image.
307 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 314 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -377,7 +384,9 @@ private:
377 384
378 const ProgramCode& program_code; 385 const ProgramCode& program_code;
379 const u32 main_offset; 386 const u32 main_offset;
380 const std::size_t program_size; 387 const CompilerSettings settings;
388 ConstBufferLocker& locker;
389
381 bool decompiled{}; 390 bool decompiled{};
382 bool disable_flow_stack{}; 391 bool disable_flow_stack{};
383 392
@@ -386,8 +395,7 @@ private:
386 395
387 std::map<u32, NodeBlock> basic_blocks; 396 std::map<u32, NodeBlock> basic_blocks;
388 NodeBlock global_code; 397 NodeBlock global_code;
389 ASTManager program_manager; 398 ASTManager program_manager{true, true};
390 CompilerSettings settings{};
391 399
392 std::set<u32> used_registers; 400 std::set<u32> used_registers;
393 std::set<Tegra::Shader::Pred> used_predicates; 401 std::set<Tegra::Shader::Pred> used_predicates;