diff options
| author | 2020-03-13 16:26:24 -0400 | |
|---|---|---|
| committer | 2020-03-13 16:26:24 -0400 | |
| commit | 666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2 (patch) | |
| tree | d0f968d06b2bbc6e378a5a0632cd2d6322fe4e6d /src/video_core/shader | |
| parent | Merge pull request #3491 from ReinUsesLisp/polygon-modes (diff) | |
| parent | Merge branch 'master' into shader-purge (diff) | |
| download | yuzu-666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2.tar.gz yuzu-666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2.tar.xz yuzu-666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2.zip | |
Merge pull request #3473 from ReinUsesLisp/shader-purge
gl_shader_cache: Rework shader cache and store texture arrays
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 126 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 103 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/shader/registry.cpp | 161 | ||||
| -rw-r--r-- | src/video_core/shader/registry.h | 137 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 6 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 18 |
11 files changed, 329 insertions, 270 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index 0638be8cb..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null | |||
| @@ -1,126 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/shader/const_buffer_locker.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Engines::SamplerDescriptor; | ||
| 16 | |||
| 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | ||
| 18 | : stage{shader_stage} {} | ||
| 19 | |||
| 20 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 21 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 22 | : stage{shader_stage}, engine{&engine} {} | ||
| 23 | |||
| 24 | ConstBufferLocker::~ConstBufferLocker() = default; | ||
| 25 | |||
| 26 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | ||
| 27 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 28 | const auto iter = keys.find(key); | ||
| 29 | if (iter != keys.end()) { | ||
| 30 | return iter->second; | ||
| 31 | } | ||
| 32 | if (!engine) { | ||
| 33 | return std::nullopt; | ||
| 34 | } | ||
| 35 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 36 | keys.emplace(key, value); | ||
| 37 | return value; | ||
| 38 | } | ||
| 39 | |||
| 40 | std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||
| 41 | const u32 key = offset; | ||
| 42 | const auto iter = bound_samplers.find(key); | ||
| 43 | if (iter != bound_samplers.end()) { | ||
| 44 | return iter->second; | ||
| 45 | } | ||
| 46 | if (!engine) { | ||
| 47 | return std::nullopt; | ||
| 48 | } | ||
| 49 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 50 | bound_samplers.emplace(key, value); | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||
| 55 | u32 buffer, u32 offset) { | ||
| 56 | const std::pair key = {buffer, offset}; | ||
| 57 | const auto iter = bindless_samplers.find(key); | ||
| 58 | if (iter != bindless_samplers.end()) { | ||
| 59 | return iter->second; | ||
| 60 | } | ||
| 61 | if (!engine) { | ||
| 62 | return std::nullopt; | ||
| 63 | } | ||
| 64 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 65 | bindless_samplers.emplace(key, value); | ||
| 66 | return value; | ||
| 67 | } | ||
| 68 | |||
| 69 | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||
| 70 | if (bound_buffer_saved) { | ||
| 71 | return bound_buffer; | ||
| 72 | } | ||
| 73 | if (!engine) { | ||
| 74 | return std::nullopt; | ||
| 75 | } | ||
| 76 | bound_buffer_saved = true; | ||
| 77 | bound_buffer = engine->GetBoundBuffer(); | ||
| 78 | return bound_buffer; | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 82 | keys.insert_or_assign({buffer, offset}, value); | ||
| 83 | } | ||
| 84 | |||
| 85 | void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 86 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 87 | } | ||
| 88 | |||
| 89 | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 90 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 91 | } | ||
| 92 | |||
| 93 | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||
| 94 | bound_buffer_saved = true; | ||
| 95 | bound_buffer = buffer; | ||
| 96 | } | ||
| 97 | |||
| 98 | bool ConstBufferLocker::IsConsistent() const { | ||
| 99 | if (!engine) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | return std::all_of(keys.begin(), keys.end(), | ||
| 103 | [this](const auto& pair) { | ||
| 104 | const auto [cbuf, offset] = pair.first; | ||
| 105 | const auto value = pair.second; | ||
| 106 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 107 | }) && | ||
| 108 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 109 | [this](const auto& sampler) { | ||
| 110 | const auto [key, value] = sampler; | ||
| 111 | return value == engine->AccessBoundSampler(stage, key); | ||
| 112 | }) && | ||
| 113 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 114 | [this](const auto& sampler) { | ||
| 115 | const auto [cbuf, offset] = sampler.first; | ||
| 116 | const auto value = sampler.second; | ||
| 117 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 118 | }); | ||
| 119 | } | ||
| 120 | |||
| 121 | bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { | ||
| 122 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 123 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index d3ea11087..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/hash.h" | ||
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/guest_driver.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 18 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 19 | using BindlessSamplerMap = | ||
| 20 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 21 | |||
| 22 | /** | ||
| 23 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | ||
| 24 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk | ||
| 25 | * shader compilation. | ||
| 26 | */ | ||
| 27 | class ConstBufferLocker { | ||
| 28 | public: | ||
| 29 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | ||
| 30 | |||
| 31 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 32 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 33 | |||
| 34 | ~ConstBufferLocker(); | ||
| 35 | |||
| 36 | /// Retrieves a key from the locker, if it's registered, it will give the registered value, if | ||
| 37 | /// not it will obtain it from maxwell3d and register it. | ||
| 38 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 39 | |||
| 40 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 41 | |||
| 42 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 43 | |||
| 44 | std::optional<u32> ObtainBoundBuffer(); | ||
| 45 | |||
| 46 | /// Inserts a key. | ||
| 47 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 48 | |||
| 49 | /// Inserts a bound sampler key. | ||
| 50 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 51 | |||
| 52 | /// Inserts a bindless sampler key. | ||
| 53 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 54 | |||
| 55 | /// Set the bound buffer for this locker. | ||
| 56 | void SetBoundBuffer(u32 buffer); | ||
| 57 | |||
| 58 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | ||
| 59 | /// the same value, false otherwise; | ||
| 60 | bool IsConsistent() const; | ||
| 61 | |||
| 62 | /// Returns true if the keys are equal to the other ones in the locker. | ||
| 63 | bool HasEqualKeys(const ConstBufferLocker& rhs) const; | ||
| 64 | |||
| 65 | /// Gives an getter to the const buffer keys in the database. | ||
| 66 | const KeyMap& GetKeys() const { | ||
| 67 | return keys; | ||
| 68 | } | ||
| 69 | |||
| 70 | /// Gets samplers database. | ||
| 71 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 72 | return bound_samplers; | ||
| 73 | } | ||
| 74 | |||
| 75 | /// Gets bindless samplers database. | ||
| 76 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 77 | return bindless_samplers; | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Gets bound buffer used on this shader | ||
| 81 | u32 GetBoundBuffer() const { | ||
| 82 | return bound_buffer; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Obtains access to the guest driver's profile. | ||
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||
| 87 | if (engine) { | ||
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | const Tegra::Engines::ShaderType stage; | ||
| 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 96 | KeyMap keys; | ||
| 97 | BoundSamplerMap bound_samplers; | ||
| 98 | BindlessSamplerMap bindless_samplers; | ||
| 99 | bool bound_buffer_saved{}; | ||
| 100 | u32 bound_buffer{}; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/shader/ast.h" | 14 | #include "video_core/shader/ast.h" |
| 15 | #include "video_core/shader/control_flow.h" | 15 | #include "video_core/shader/control_flow.h" |
| 16 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | 17 | #include "video_core/shader/shader_ir.h" |
| 17 | 18 | ||
| 18 | namespace VideoCommon::Shader { | 19 | namespace VideoCommon::Shader { |
| @@ -64,11 +65,11 @@ struct BlockInfo { | |||
| 64 | }; | 65 | }; |
| 65 | 66 | ||
| 66 | struct CFGRebuildState { | 67 | struct CFGRebuildState { |
| 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | 68 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) |
| 68 | : program_code{program_code}, locker{locker}, start{start} {} | 69 | : program_code{program_code}, registry{registry}, start{start} {} |
| 69 | 70 | ||
| 70 | const ProgramCode& program_code; | 71 | const ProgramCode& program_code; |
| 71 | ConstBufferLocker& locker; | 72 | Registry& registry; |
| 72 | u32 start{}; | 73 | u32 start{}; |
| 73 | std::vector<BlockInfo> block_info; | 74 | std::vector<BlockInfo> block_info; |
| 74 | std::list<u32> inspect_queries; | 75 | std::list<u32> inspect_queries; |
| @@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 438 | const s32 pc_target = offset + result.relative_position; | 439 | const s32 pc_target = offset + result.relative_position; |
| 439 | std::vector<CaseBranch> branches; | 440 | std::vector<CaseBranch> branches; |
| 440 | for (u32 i = 0; i < result.entries; i++) { | 441 | for (u32 i = 0; i < result.entries; i++) { |
| 441 | auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); | 442 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); |
| 442 | if (!key) { | 443 | if (!key) { |
| 443 | return {ParseResult::AbnormalFlow, parse_info}; | 444 | return {ParseResult::AbnormalFlow, parse_info}; |
| 444 | } | 445 | } |
| @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 656 | 657 | ||
| 657 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 658 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 658 | const CompilerSettings& settings, | 659 | const CompilerSettings& settings, |
| 659 | ConstBufferLocker& locker) { | 660 | Registry& registry) { |
| 660 | auto result_out = std::make_unique<ShaderCharacteristics>(); | 661 | auto result_out = std::make_unique<ShaderCharacteristics>(); |
| 661 | if (settings.depth == CompileDepth::BruteForce) { | 662 | if (settings.depth == CompileDepth::BruteForce) { |
| 662 | result_out->settings.depth = CompileDepth::BruteForce; | 663 | result_out->settings.depth = CompileDepth::BruteForce; |
| 663 | return result_out; | 664 | return result_out; |
| 664 | } | 665 | } |
| 665 | 666 | ||
| 666 | CFGRebuildState state{program_code, start_address, locker}; | 667 | CFGRebuildState state{program_code, start_address, registry}; |
| 667 | // Inspect Code and generate blocks | 668 | // Inspect Code and generate blocks |
| 668 | state.labels.clear(); | 669 | state.labels.clear(); |
| 669 | state.labels.emplace(start_address); | 670 | state.labels.emplace(start_address); |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/ast.h" | 13 | #include "video_core/shader/ast.h" |
| 14 | #include "video_core/shader/compiler_settings.h" | 14 | #include "video_core/shader/compiler_settings.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -111,6 +112,6 @@ struct ShaderCharacteristics { | |||
| 111 | 112 | ||
| 112 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 113 | const CompilerSettings& settings, | 114 | const CompilerSettings& settings, |
| 114 | ConstBufferLocker& locker); | 115 | Registry& registry); |
| 115 | 116 | ||
| 116 | } // namespace VideoCommon::Shader | 117 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 34 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 38 | const std::list<Sampler>& used_samplers) { | 38 | const std::list<Sampler>& used_samplers) { |
| 39 | if (gpu_driver == nullptr) { | 39 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | 40 | return; |
| 45 | } | 41 | } |
| 46 | u32 count{}; | 42 | u32 count{}; |
| @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | |||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | 49 | bound_offsets.emplace_back(sampler.GetOffset()); |
| 54 | } | 50 | } |
| 55 | if (count > 1) { | 51 | if (count > 1) { |
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | 52 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); |
| 57 | } | 53 | } |
| 58 | } | 54 | } |
| 59 | 55 | ||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 56 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, |
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | 57 | VideoCore::GuestDriverProfile& gpu_driver, |
| 62 | const std::list<Sampler>& used_samplers) { | 58 | const std::list<Sampler>& used_samplers) { |
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | 59 | const u32 base_offset = sampler_to_deduce.GetOffset(); |
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | 60 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 69 | for (const auto& sampler : used_samplers) { | 61 | for (const auto& sampler : used_samplers) { |
| @@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | |||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | 69 | if (max_offset == std::numeric_limits<u32>::max()) { |
| 78 | return std::nullopt; | 70 | return std::nullopt; |
| 79 | } | 71 | } |
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | 72 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); |
| 81 | } | 73 | } |
| 82 | 74 | ||
| 83 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| @@ -149,7 +141,7 @@ void ShaderIR::Decode() { | |||
| 149 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 141 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 150 | 142 | ||
| 151 | decompiled = false; | 143 | decompiled = false; |
| 152 | auto info = ScanFlow(program_code, main_offset, settings, locker); | 144 | auto info = ScanFlow(program_code, main_offset, settings, registry); |
| 153 | auto& shader_info = *info; | 145 | auto& shader_info = *info; |
| 154 | coverage_begin = shader_info.start; | 146 | coverage_begin = shader_info.start; |
| 155 | coverage_end = shader_info.end; | 147 | coverage_end = shader_info.end; |
| @@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 364 | 356 | ||
| 365 | void ShaderIR::PostDecode() { | 357 | void ShaderIR::PostDecode() { |
| 366 | // Deduce texture handler size if needed | 358 | // Deduce texture handler size if needed |
| 367 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 359 | auto gpu_driver = registry.AccessGuestDriverProfile(); |
| 368 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | 360 | DeduceTextureHandlerSize(gpu_driver, used_samplers); |
| 369 | // Deduce Indexed Samplers | 361 | // Deduce Indexed Samplers |
| 370 | if (!uses_indexed_samplers) { | 362 | if (!uses_indexed_samplers) { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 14 | #include "video_core/shader/node_helper.h" | 14 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample | |||
| 359 | if (sampler_info) { | 360 | if (sampler_info) { |
| 360 | return *sampler_info; | 361 | return *sampler_info; |
| 361 | } | 362 | } |
| 362 | const auto sampler = | 363 | const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) |
| 363 | buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); | 364 | : registry.ObtainBoundSampler(offset); |
| 364 | if (!sampler) { | 365 | if (!sampler) { |
| 365 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 366 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 366 | return SamplerInfo{TextureType::Texture2D, false, false, false}; | 367 | return SamplerInfo{TextureType::Texture2D, false, false, false}; |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..af70b3f35 --- /dev/null +++ b/src/video_core/shader/registry.cpp | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 28 | |||
| 29 | GraphicsInfo info; | ||
| 30 | info.tfb_layouts = graphics.regs.tfb_layouts; | ||
| 31 | info.tfb_varying_locs = graphics.regs.tfb_varying_locs; | ||
| 32 | info.primitive_topology = graphics.regs.draw.topology; | ||
| 33 | info.tessellation_primitive = graphics.regs.tess_mode.prim; | ||
| 34 | info.tessellation_spacing = graphics.regs.tess_mode.spacing; | ||
| 35 | info.tfb_enabled = graphics.regs.tfb_enabled; | ||
| 36 | info.tessellation_clockwise = graphics.regs.tess_mode.cw; | ||
| 37 | return info; | ||
| 38 | } | ||
| 39 | |||
| 40 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 41 | if (shader_stage != ShaderType::Compute) { | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 45 | const auto& launch = compute.launch_description; | ||
| 46 | |||
| 47 | ComputeInfo info; | ||
| 48 | info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; | ||
| 49 | info.local_memory_size_in_words = launch.local_pos_alloc; | ||
| 50 | info.shared_memory_size_in_words = launch.shared_alloc; | ||
| 51 | return info; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // Anonymous namespace | ||
| 55 | |||
| 56 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 57 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 58 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 59 | |||
| 60 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 62 | : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, | ||
| 63 | graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( | ||
| 64 | shader_stage, engine)} {} | ||
| 65 | |||
| 66 | Registry::~Registry() = default; | ||
| 67 | |||
| 68 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 69 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 70 | const auto iter = keys.find(key); | ||
| 71 | if (iter != keys.end()) { | ||
| 72 | return iter->second; | ||
| 73 | } | ||
| 74 | if (!engine) { | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 78 | keys.emplace(key, value); | ||
| 79 | return value; | ||
| 80 | } | ||
| 81 | |||
| 82 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 83 | const u32 key = offset; | ||
| 84 | const auto iter = bound_samplers.find(key); | ||
| 85 | if (iter != bound_samplers.end()) { | ||
| 86 | return iter->second; | ||
| 87 | } | ||
| 88 | if (!engine) { | ||
| 89 | return std::nullopt; | ||
| 90 | } | ||
| 91 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 92 | bound_samplers.emplace(key, value); | ||
| 93 | return value; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, | ||
| 97 | u32 offset) { | ||
| 98 | const std::pair key = {buffer, offset}; | ||
| 99 | const auto iter = bindless_samplers.find(key); | ||
| 100 | if (iter != bindless_samplers.end()) { | ||
| 101 | return iter->second; | ||
| 102 | } | ||
| 103 | if (!engine) { | ||
| 104 | return std::nullopt; | ||
| 105 | } | ||
| 106 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 107 | bindless_samplers.emplace(key, value); | ||
| 108 | return value; | ||
| 109 | } | ||
| 110 | |||
| 111 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 112 | keys.insert_or_assign({buffer, offset}, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 116 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 117 | } | ||
| 118 | |||
| 119 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 120 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool Registry::IsConsistent() const { | ||
| 124 | if (!engine) { | ||
| 125 | return true; | ||
| 126 | } | ||
| 127 | return std::all_of(keys.begin(), keys.end(), | ||
| 128 | [this](const auto& pair) { | ||
| 129 | const auto [cbuf, offset] = pair.first; | ||
| 130 | const auto value = pair.second; | ||
| 131 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 132 | }) && | ||
| 133 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 134 | [this](const auto& sampler) { | ||
| 135 | const auto [key, value] = sampler; | ||
| 136 | return value == engine->AccessBoundSampler(stage, key); | ||
| 137 | }) && | ||
| 138 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 139 | [this](const auto& sampler) { | ||
| 140 | const auto [cbuf, offset] = sampler.first; | ||
| 141 | const auto value = sampler.second; | ||
| 142 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 143 | }); | ||
| 144 | } | ||
| 145 | |||
| 146 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 147 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 148 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 149 | } | ||
| 150 | |||
| 151 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 152 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 153 | return graphics_info; | ||
| 154 | } | ||
| 155 | |||
| 156 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 157 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 158 | return compute_info; | ||
| 159 | } | ||
| 160 | |||
| 161 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..0c80d35fd --- /dev/null +++ b/src/video_core/shader/registry.h | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 23 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 24 | using BindlessSamplerMap = | ||
| 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 26 | |||
| 27 | struct GraphicsInfo { | ||
| 28 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 29 | |||
| 30 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 31 | tfb_layouts{}; | ||
| 32 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 33 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 34 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 35 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 36 | bool tfb_enabled = false; | ||
| 37 | bool tessellation_clockwise = false; | ||
| 38 | }; | ||
| 39 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 40 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 41 | |||
| 42 | struct ComputeInfo { | ||
| 43 | std::array<u32, 3> workgroup_size{}; | ||
| 44 | u32 shared_memory_size_in_words = 0; | ||
| 45 | u32 local_memory_size_in_words = 0; | ||
| 46 | }; | ||
| 47 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 48 | |||
| 49 | struct SerializedRegistryInfo { | ||
| 50 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 51 | u32 bound_buffer = 0; | ||
| 52 | GraphicsInfo graphics; | ||
| 53 | ComputeInfo compute; | ||
| 54 | }; | ||
| 55 | |||
| 56 | /** | ||
| 57 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 58 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 59 | * compilation. | ||
| 60 | */ | ||
| 61 | class Registry { | ||
| 62 | public: | ||
| 63 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 64 | |||
| 65 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 66 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 67 | |||
| 68 | ~Registry(); | ||
| 69 | |||
| 70 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 71 | /// not it will obtain it from maxwell3d and register it. | ||
| 72 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 73 | |||
| 74 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 75 | |||
| 76 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 77 | |||
| 78 | /// Inserts a key. | ||
| 79 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 80 | |||
| 81 | /// Inserts a bound sampler key. | ||
| 82 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 83 | |||
| 84 | /// Inserts a bindless sampler key. | ||
| 85 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 86 | |||
| 87 | /// Checks keys and samplers against engine's current const buffers. | ||
| 88 | /// Returns true if they are the same value, false otherwise. | ||
| 89 | bool IsConsistent() const; | ||
| 90 | |||
| 91 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 92 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 93 | |||
| 94 | /// Returns graphics information from this shader | ||
| 95 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 96 | |||
| 97 | /// Returns compute information from this shader | ||
| 98 | const ComputeInfo& GetComputeInfo() const; | ||
| 99 | |||
| 100 | /// Gives an getter to the const buffer keys in the database. | ||
| 101 | const KeyMap& GetKeys() const { | ||
| 102 | return keys; | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Gets samplers database. | ||
| 106 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 107 | return bound_samplers; | ||
| 108 | } | ||
| 109 | |||
| 110 | /// Gets bindless samplers database. | ||
| 111 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 112 | return bindless_samplers; | ||
| 113 | } | ||
| 114 | |||
| 115 | /// Gets bound buffer used on this shader | ||
| 116 | u32 GetBoundBuffer() const { | ||
| 117 | return bound_buffer; | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Obtains access to the guest driver's profile. | ||
| 121 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 122 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 123 | } | ||
| 124 | |||
| 125 | private: | ||
| 126 | const Tegra::Engines::ShaderType stage; | ||
| 127 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 128 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 129 | KeyMap keys; | ||
| 130 | BoundSamplerMap bound_samplers; | ||
| 131 | BindlessSamplerMap bindless_samplers; | ||
| 132 | u32 bound_buffer; | ||
| 133 | GraphicsInfo graphics_info; | ||
| 134 | ComputeInfo compute_info; | ||
| 135 | }; | ||
| 136 | |||
| 137 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..425927777 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/node_helper.h" | 13 | #include "video_core/shader/node_helper.h" |
| 14 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 15 | #include "video_core/shader/shader_ir.h" |
| 15 | 16 | ||
| 16 | namespace VideoCommon::Shader { | 17 | namespace VideoCommon::Shader { |
| @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; | |||
| 24 | using Tegra::Shader::Register; | 25 | using Tegra::Shader::Register; |
| 25 | 26 | ||
| 26 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 27 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 27 | ConstBufferLocker& locker) | 28 | Registry& registry) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 29 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { |
| 29 | Decode(); | 30 | Decode(); |
| 30 | PostDecode(); | 31 | PostDecode(); |
| 31 | } | 32 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..dde036b40 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | #include "video_core/engines/shader_header.h" | 18 | #include "video_core/engines/shader_header.h" |
| 19 | #include "video_core/shader/ast.h" | 19 | #include "video_core/shader/ast.h" |
| 20 | #include "video_core/shader/compiler_settings.h" | 20 | #include "video_core/shader/compiler_settings.h" |
| 21 | #include "video_core/shader/const_buffer_locker.h" | ||
| 22 | #include "video_core/shader/node.h" | 21 | #include "video_core/shader/node.h" |
| 22 | #include "video_core/shader/registry.h" | ||
| 23 | 23 | ||
| 24 | namespace VideoCommon::Shader { | 24 | namespace VideoCommon::Shader { |
| 25 | 25 | ||
| @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { | |||
| 69 | class ShaderIR final { | 69 | class ShaderIR final { |
| 70 | public: | 70 | public: |
| 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 72 | ConstBufferLocker& locker); | 72 | Registry& registry); |
| 73 | ~ShaderIR(); | 73 | ~ShaderIR(); |
| 74 | 74 | ||
| 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -414,7 +414,7 @@ private: | |||
| 414 | const ProgramCode& program_code; | 414 | const ProgramCode& program_code; |
| 415 | const u32 main_offset; | 415 | const u32 main_offset; |
| 416 | const CompilerSettings settings; | 416 | const CompilerSettings settings; |
| 417 | ConstBufferLocker& locker; | 417 | Registry& registry; |
| 418 | 418 | ||
| 419 | bool decompiled{}; | 419 | bool decompiled{}; |
| 420 | bool disable_flow_stack{}; | 420 | bool disable_flow_stack{}; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); |
| 82 | return {tracked, track}; | 82 | return {tracked, track}; |
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 84 | auto bound_buffer = locker.ObtainBoundBuffer(); | 84 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 85 | if (!bound_buffer) { | 85 | if (bound_buffer != cbuf->GetIndex()) { |
| 86 | return {}; | 86 | return {}; |
| 87 | } | 87 | } |
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | 88 | const auto pair = DecoupleIndirectRead(*operation); |
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | 89 | if (!pair) { |
| 93 | return {}; | 90 | return {}; |
| 94 | } | 91 | } |
| 95 | auto [gpr, base_offset] = *pair; | 92 | auto [gpr, base_offset] = *pair; |
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | 93 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); |
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 94 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); |
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | 95 | const u32 bindless_cv = NewCustomVariable(); |
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | 96 | const Node op = |
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | 97 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); |
| 104 | 98 | ||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | 99 | const Node cv_node = GetCustomVariable(bindless_cv); |
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | 100 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); |