summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2020-03-13 16:26:24 -0400
committerGravatar GitHub2020-03-13 16:26:24 -0400
commit666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2 (patch)
treed0f968d06b2bbc6e378a5a0632cd2d6322fe4e6d /src/video_core/shader
parentMerge pull request #3491 from ReinUsesLisp/polygon-modes (diff)
parentMerge branch 'master' into shader-purge (diff)
downloadyuzu-666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2.tar.gz
yuzu-666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2.tar.xz
yuzu-666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2.zip
Merge pull request #3473 from ReinUsesLisp/shader-purge
gl_shader_cache: Rework shader cache and store texture arrays
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h6
-rw-r--r--src/video_core/shader/track.cpp18
11 files changed, 329 insertions, 270 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/shader/const_buffer_locker.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Engines::SamplerDescriptor;
16
17ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
18 : stage{shader_stage} {}
19
20ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
21 Tegra::Engines::ConstBufferEngineInterface& engine)
22 : stage{shader_stage}, engine{&engine} {}
23
24ConstBufferLocker::~ConstBufferLocker() = default;
25
26std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
27 const std::pair<u32, u32> key = {buffer, offset};
28 const auto iter = keys.find(key);
29 if (iter != keys.end()) {
30 return iter->second;
31 }
32 if (!engine) {
33 return std::nullopt;
34 }
35 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
36 keys.emplace(key, value);
37 return value;
38}
39
40std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
41 const u32 key = offset;
42 const auto iter = bound_samplers.find(key);
43 if (iter != bound_samplers.end()) {
44 return iter->second;
45 }
46 if (!engine) {
47 return std::nullopt;
48 }
49 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
50 bound_samplers.emplace(key, value);
51 return value;
52}
53
54std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
55 u32 buffer, u32 offset) {
56 const std::pair key = {buffer, offset};
57 const auto iter = bindless_samplers.find(key);
58 if (iter != bindless_samplers.end()) {
59 return iter->second;
60 }
61 if (!engine) {
62 return std::nullopt;
63 }
64 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
65 bindless_samplers.emplace(key, value);
66 return value;
67}
68
69std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
70 if (bound_buffer_saved) {
71 return bound_buffer;
72 }
73 if (!engine) {
74 return std::nullopt;
75 }
76 bound_buffer_saved = true;
77 bound_buffer = engine->GetBoundBuffer();
78 return bound_buffer;
79}
80
81void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
82 keys.insert_or_assign({buffer, offset}, value);
83}
84
85void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
86 bound_samplers.insert_or_assign(offset, sampler);
87}
88
89void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
90 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
91}
92
93void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
94 bound_buffer_saved = true;
95 bound_buffer = buffer;
96}
97
98bool ConstBufferLocker::IsConsistent() const {
99 if (!engine) {
100 return false;
101 }
102 return std::all_of(keys.begin(), keys.end(),
103 [this](const auto& pair) {
104 const auto [cbuf, offset] = pair.first;
105 const auto value = pair.second;
106 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
107 }) &&
108 std::all_of(bound_samplers.begin(), bound_samplers.end(),
109 [this](const auto& sampler) {
110 const auto [key, value] = sampler;
111 return value == engine->AccessBoundSampler(stage, key);
112 }) &&
113 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
114 [this](const auto& sampler) {
115 const auto [cbuf, offset] = sampler.first;
116 const auto value = sampler.second;
117 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
118 });
119}
120
121bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
122 return std::tie(keys, bound_samplers, bindless_samplers) ==
123 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
124}
125
126} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <unordered_map>
9#include "common/common_types.h"
10#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h"
14
15namespace VideoCommon::Shader {
16
17using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
18using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
19using BindlessSamplerMap =
20 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
21
22/**
23 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
24 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
25 * shader compilation.
26 */
27class ConstBufferLocker {
28public:
29 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
30
31 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
32 Tegra::Engines::ConstBufferEngineInterface& engine);
33
34 ~ConstBufferLocker();
35
36 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
37 /// not it will obtain it from maxwell3d and register it.
38 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
39
40 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
41
42 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
43
44 std::optional<u32> ObtainBoundBuffer();
45
46 /// Inserts a key.
47 void InsertKey(u32 buffer, u32 offset, u32 value);
48
49 /// Inserts a bound sampler key.
50 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
51
52 /// Inserts a bindless sampler key.
53 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
54
55 /// Set the bound buffer for this locker.
56 void SetBoundBuffer(u32 buffer);
57
58 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
59 /// the same value, false otherwise;
60 bool IsConsistent() const;
61
62 /// Returns true if the keys are equal to the other ones in the locker.
63 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
64
65 /// Gives an getter to the const buffer keys in the database.
66 const KeyMap& GetKeys() const {
67 return keys;
68 }
69
70 /// Gets samplers database.
71 const BoundSamplerMap& GetBoundSamplers() const {
72 return bound_samplers;
73 }
74
75 /// Gets bindless samplers database.
76 const BindlessSamplerMap& GetBindlessSamplers() const {
77 return bindless_samplers;
78 }
79
80 /// Gets bound buffer used on this shader
81 u32 GetBoundBuffer() const {
82 return bound_buffer;
83 }
84
85 /// Obtains access to the guest driver's profile.
86 VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
87 if (engine) {
88 return &engine->AccessGuestDriverProfile();
89 }
90 return nullptr;
91 }
92
93private:
94 const Tegra::Engines::ShaderType stage;
95 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
96 KeyMap keys;
97 BoundSamplerMap bound_samplers;
98 BindlessSamplerMap bindless_samplers;
99 bool bound_buffer_saved{};
100 u32 bound_buffer{};
101};
102
103} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/shader/ast.h" 14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h" 15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h" 17#include "video_core/shader/shader_ir.h"
17 18
18namespace VideoCommon::Shader { 19namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
64}; 65};
65 66
66struct CFGRebuildState { 67struct CFGRebuildState {
67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) 68 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
68 : program_code{program_code}, locker{locker}, start{start} {} 69 : program_code{program_code}, registry{registry}, start{start} {}
69 70
70 const ProgramCode& program_code; 71 const ProgramCode& program_code;
71 ConstBufferLocker& locker; 72 Registry& registry;
72 u32 start{}; 73 u32 start{};
73 std::vector<BlockInfo> block_info; 74 std::vector<BlockInfo> block_info;
74 std::list<u32> inspect_queries; 75 std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
438 const s32 pc_target = offset + result.relative_position; 439 const s32 pc_target = offset + result.relative_position;
439 std::vector<CaseBranch> branches; 440 std::vector<CaseBranch> branches;
440 for (u32 i = 0; i < result.entries; i++) { 441 for (u32 i = 0; i < result.entries; i++) {
441 auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); 442 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
442 if (!key) { 443 if (!key) {
443 return {ParseResult::AbnormalFlow, parse_info}; 444 return {ParseResult::AbnormalFlow, parse_info};
444 } 445 }
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
656 657
657std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 658std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
658 const CompilerSettings& settings, 659 const CompilerSettings& settings,
659 ConstBufferLocker& locker) { 660 Registry& registry) {
660 auto result_out = std::make_unique<ShaderCharacteristics>(); 661 auto result_out = std::make_unique<ShaderCharacteristics>();
661 if (settings.depth == CompileDepth::BruteForce) { 662 if (settings.depth == CompileDepth::BruteForce) {
662 result_out->settings.depth = CompileDepth::BruteForce; 663 result_out->settings.depth = CompileDepth::BruteForce;
663 return result_out; 664 return result_out;
664 } 665 }
665 666
666 CFGRebuildState state{program_code, start_address, locker}; 667 CFGRebuildState state{program_code, start_address, registry};
667 // Inspect Code and generate blocks 668 // Inspect Code and generate blocks
668 state.labels.clear(); 669 state.labels.clear();
669 state.labels.emplace(start_address); 670 state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h" 14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
111 112
112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
113 const CompilerSettings& settings, 114 const CompilerSettings& settings,
114 ConstBufferLocker& locker); 115 Registry& registry);
115 116
116} // namespace VideoCommon::Shader 117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
34 return (absolute_offset % SchedPeriod) == 0; 34 return (absolute_offset % SchedPeriod) == 0;
35} 35}
36 36
37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, 37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
38 const std::list<Sampler>& used_samplers) { 38 const std::list<Sampler>& used_samplers) {
39 if (gpu_driver == nullptr) { 39 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
40 LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
41 return;
42 }
43 if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
44 return; 40 return;
45 } 41 }
46 u32 count{}; 42 u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
53 bound_offsets.emplace_back(sampler.GetOffset()); 49 bound_offsets.emplace_back(sampler.GetOffset());
54 } 50 }
55 if (count > 1) { 51 if (count > 1) {
56 gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); 52 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
57 } 53 }
58} 54}
59 55
60std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 56std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
61 VideoCore::GuestDriverProfile* gpu_driver, 57 VideoCore::GuestDriverProfile& gpu_driver,
62 const std::list<Sampler>& used_samplers) { 58 const std::list<Sampler>& used_samplers) {
63 if (gpu_driver == nullptr) {
64 LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
65 return std::nullopt;
66 }
67 const u32 base_offset = sampler_to_deduce.GetOffset(); 59 const u32 base_offset = sampler_to_deduce.GetOffset();
68 u32 max_offset{std::numeric_limits<u32>::max()}; 60 u32 max_offset{std::numeric_limits<u32>::max()};
69 for (const auto& sampler : used_samplers) { 61 for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
77 if (max_offset == std::numeric_limits<u32>::max()) { 69 if (max_offset == std::numeric_limits<u32>::max()) {
78 return std::nullopt; 70 return std::nullopt;
79 } 71 }
80 return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); 72 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
81} 73}
82 74
83} // Anonymous namespace 75} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
149 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 141 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
150 142
151 decompiled = false; 143 decompiled = false;
152 auto info = ScanFlow(program_code, main_offset, settings, locker); 144 auto info = ScanFlow(program_code, main_offset, settings, registry);
153 auto& shader_info = *info; 145 auto& shader_info = *info;
154 coverage_begin = shader_info.start; 146 coverage_begin = shader_info.start;
155 coverage_end = shader_info.end; 147 coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
364 356
365void ShaderIR::PostDecode() { 357void ShaderIR::PostDecode() {
366 // Deduce texture handler size if needed 358 // Deduce texture handler size if needed
367 auto gpu_driver = locker.AccessGuestDriverProfile(); 359 auto gpu_driver = registry.AccessGuestDriverProfile();
368 DeduceTextureHandlerSize(gpu_driver, used_samplers); 360 DeduceTextureHandlerSize(gpu_driver, used_samplers);
369 // Deduce Indexed Samplers 361 // Deduce Indexed Samplers
370 if (!uses_indexed_samplers) { 362 if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
359 if (sampler_info) { 360 if (sampler_info) {
360 return *sampler_info; 361 return *sampler_info;
361 } 362 }
362 const auto sampler = 363 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
363 buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); 364 : registry.ObtainBoundSampler(offset);
364 if (!sampler) { 365 if (!sampler) {
365 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
366 return SamplerInfo{TextureType::Texture2D, false, false, false}; 367 return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27 auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
28
29 GraphicsInfo info;
30 info.tfb_layouts = graphics.regs.tfb_layouts;
31 info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
32 info.primitive_topology = graphics.regs.draw.topology;
33 info.tessellation_primitive = graphics.regs.tess_mode.prim;
34 info.tessellation_spacing = graphics.regs.tess_mode.spacing;
35 info.tfb_enabled = graphics.regs.tfb_enabled;
36 info.tessellation_clockwise = graphics.regs.tess_mode.cw;
37 return info;
38}
39
40ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
41 if (shader_stage != ShaderType::Compute) {
42 return {};
43 }
44 auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
45 const auto& launch = compute.launch_description;
46
47 ComputeInfo info;
48 info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
49 info.local_memory_size_in_words = launch.local_pos_alloc;
50 info.shared_memory_size_in_words = launch.shared_alloc;
51 return info;
52}
53
54} // Anonymous namespace
55
56Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
57 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
58 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
59
60Registry::Registry(Tegra::Engines::ShaderType shader_stage,
61 Tegra::Engines::ConstBufferEngineInterface& engine)
62 : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
63 graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
64 shader_stage, engine)} {}
65
66Registry::~Registry() = default;
67
68std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
69 const std::pair<u32, u32> key = {buffer, offset};
70 const auto iter = keys.find(key);
71 if (iter != keys.end()) {
72 return iter->second;
73 }
74 if (!engine) {
75 return std::nullopt;
76 }
77 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
78 keys.emplace(key, value);
79 return value;
80}
81
82std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
83 const u32 key = offset;
84 const auto iter = bound_samplers.find(key);
85 if (iter != bound_samplers.end()) {
86 return iter->second;
87 }
88 if (!engine) {
89 return std::nullopt;
90 }
91 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
92 bound_samplers.emplace(key, value);
93 return value;
94}
95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) {
98 const std::pair key = {buffer, offset};
99 const auto iter = bindless_samplers.find(key);
100 if (iter != bindless_samplers.end()) {
101 return iter->second;
102 }
103 if (!engine) {
104 return std::nullopt;
105 }
106 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
107 bindless_samplers.emplace(key, value);
108 return value;
109}
110
111void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
112 keys.insert_or_assign({buffer, offset}, value);
113}
114
115void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
116 bound_samplers.insert_or_assign(offset, sampler);
117}
118
119void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
120 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
121}
122
123bool Registry::IsConsistent() const {
124 if (!engine) {
125 return true;
126 }
127 return std::all_of(keys.begin(), keys.end(),
128 [this](const auto& pair) {
129 const auto [cbuf, offset] = pair.first;
130 const auto value = pair.second;
131 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
132 }) &&
133 std::all_of(bound_samplers.begin(), bound_samplers.end(),
134 [this](const auto& sampler) {
135 const auto [key, value] = sampler;
136 return value == engine->AccessBoundSampler(stage, key);
137 }) &&
138 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
139 [this](const auto& sampler) {
140 const auto [cbuf, offset] = sampler.first;
141 const auto value = sampler.second;
142 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
143 });
144}
145
146bool Registry::HasEqualKeys(const Registry& rhs) const {
147 return std::tie(keys, bound_samplers, bindless_samplers) ==
148 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
149}
150
151const GraphicsInfo& Registry::GetGraphicsInfo() const {
152 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
153 return graphics_info;
154}
155
156const ComputeInfo& Registry::GetComputeInfo() const {
157 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
158 return compute_info;
159}
160
161} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26
27struct GraphicsInfo {
28 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
29
30 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
31 tfb_layouts{};
32 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
33 Maxwell::PrimitiveTopology primitive_topology{};
34 Maxwell::TessellationPrimitive tessellation_primitive{};
35 Maxwell::TessellationSpacing tessellation_spacing{};
36 bool tfb_enabled = false;
37 bool tessellation_clockwise = false;
38};
39static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
40 std::is_standard_layout_v<GraphicsInfo>);
41
42struct ComputeInfo {
43 std::array<u32, 3> workgroup_size{};
44 u32 shared_memory_size_in_words = 0;
45 u32 local_memory_size_in_words = 0;
46};
47static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
48
49struct SerializedRegistryInfo {
50 VideoCore::GuestDriverProfile guest_driver_profile;
51 u32 bound_buffer = 0;
52 GraphicsInfo graphics;
53 ComputeInfo compute;
54};
55
56/**
57 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
58 * With it, the shader can obtain required data from GPU state and store it for disk shader
59 * compilation.
60 */
61class Registry {
62public:
63 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
64
65 explicit Registry(Tegra::Engines::ShaderType shader_stage,
66 Tegra::Engines::ConstBufferEngineInterface& engine);
67
68 ~Registry();
69
70 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
71 /// not it will obtain it from maxwell3d and register it.
72 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
73
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77
78 /// Inserts a key.
79 void InsertKey(u32 buffer, u32 offset, u32 value);
80
81 /// Inserts a bound sampler key.
82 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
83
84 /// Inserts a bindless sampler key.
85 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
86
87 /// Checks keys and samplers against engine's current const buffers.
88 /// Returns true if they are the same value, false otherwise.
89 bool IsConsistent() const;
90
91 /// Returns true if the keys are equal to the other ones in the registry.
92 bool HasEqualKeys(const Registry& rhs) const;
93
94 /// Returns graphics information from this shader
95 const GraphicsInfo& GetGraphicsInfo() const;
96
97 /// Returns compute information from this shader
98 const ComputeInfo& GetComputeInfo() const;
99
100 /// Gives an getter to the const buffer keys in the database.
101 const KeyMap& GetKeys() const {
102 return keys;
103 }
104
105 /// Gets samplers database.
106 const BoundSamplerMap& GetBoundSamplers() const {
107 return bound_samplers;
108 }
109
110 /// Gets bindless samplers database.
111 const BindlessSamplerMap& GetBindlessSamplers() const {
112 return bindless_samplers;
113 }
114
115 /// Gets bound buffer used on this shader
116 u32 GetBoundBuffer() const {
117 return bound_buffer;
118 }
119
120 /// Obtains access to the guest driver's profile.
121 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
122 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
123 }
124
125private:
126 const Tegra::Engines::ShaderType stage;
127 VideoCore::GuestDriverProfile stored_guest_driver_profile;
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys;
130 BoundSamplerMap bound_samplers;
131 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer;
133 GraphicsInfo graphics_info;
134 ComputeInfo compute_info;
135};
136
137} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..425927777 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node_helper.h" 13#include "video_core/shader/node_helper.h"
14#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
15 16
16namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
24using Tegra::Shader::Register; 25using Tegra::Shader::Register;
25 26
26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 27ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
27 ConstBufferLocker& locker) 28 Registry& registry)
28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { 29 : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
29 Decode(); 30 Decode();
30 PostDecode(); 31 PostDecode();
31} 32}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..dde036b40 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
18#include "video_core/engines/shader_header.h" 18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h" 19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h" 20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/const_buffer_locker.h"
22#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
22#include "video_core/shader/registry.h"
23 23
24namespace VideoCommon::Shader { 24namespace VideoCommon::Shader {
25 25
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
69class ShaderIR final { 69class ShaderIR final {
70public: 70public:
71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
72 ConstBufferLocker& locker); 72 Registry& registry);
73 ~ShaderIR(); 73 ~ShaderIR();
74 74
75 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 75 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -414,7 +414,7 @@ private:
414 const ProgramCode& program_code; 414 const ProgramCode& program_code;
415 const u32 main_offset; 415 const u32 main_offset;
416 const CompilerSettings settings; 416 const CompilerSettings settings;
417 ConstBufferLocker& locker; 417 Registry& registry;
418 418
419 bool decompiled{}; 419 bool decompiled{};
420 bool disable_flow_stack{}; 420 bool disable_flow_stack{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 15e22b9fa..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); 81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
82 return {tracked, track}; 82 return {tracked, track};
83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { 83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
84 auto bound_buffer = locker.ObtainBoundBuffer(); 84 const u32 bound_buffer = registry.GetBoundBuffer();
85 if (!bound_buffer) { 85 if (bound_buffer != cbuf->GetIndex()) {
86 return {}; 86 return {};
87 } 87 }
88 if (*bound_buffer != cbuf->GetIndex()) { 88 const auto pair = DecoupleIndirectRead(*operation);
89 return {};
90 }
91 auto pair = DecoupleIndirectRead(*operation);
92 if (!pair) { 89 if (!pair) {
93 return {}; 90 return {};
94 } 91 }
95 auto [gpr, base_offset] = *pair; 92 auto [gpr, base_offset] = *pair;
96 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); 93 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
97 auto gpu_driver = locker.AccessGuestDriverProfile(); 94 const auto& gpu_driver = registry.AccessGuestDriverProfile();
98 if (gpu_driver == nullptr) {
99 return {};
100 }
101 const u32 bindless_cv = NewCustomVariable(); 95 const u32 bindless_cv = NewCustomVariable();
102 const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, 96 const Node op =
103 Immediate(gpu_driver->GetTextureHandlerSize())); 97 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
104 98
105 const Node cv_node = GetCustomVariable(bindless_cv); 99 const Node cv_node = GetCustomVariable(bindless_cv);
106 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); 100 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));