From bd8b9bbcee93549f323352f227ff44d0e79e0ad4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Feb 2020 16:13:47 -0300 Subject: gl_shader_cache: Rework shader cache and remove post-specializations Instead of pre-specializing shaders and then post-specializing them, drop the later and only "specialize" the shader while decoding it. --- src/video_core/shader/const_buffer_locker.cpp | 7 ++++--- src/video_core/shader/const_buffer_locker.h | 11 +++++------ src/video_core/shader/decode.cpp | 18 +++++------------- src/video_core/shader/track.cpp | 9 +++------ 4 files changed, 17 insertions(+), 28 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 0638be8cb..c859dd7ca 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -14,8 +14,9 @@ namespace VideoCommon::Shader { using Tegra::Engines::SamplerDescriptor; -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) - : stage{shader_stage} {} +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile) + : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) @@ -97,7 +98,7 @@ void ConstBufferLocker::SetBoundBuffer(u32 buffer) { bool ConstBufferLocker::IsConsistent() const { if (!engine) { - return false; + return true; } return std::all_of(keys.begin(), keys.end(), [this](const auto& pair) { diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d3ea11087..7c6f7bbdd 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -26,7 +26,8 @@ using BindlessSamplerMap = */ class ConstBufferLocker { public: - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile); explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); @@ -83,15 +84,13 @@ public: } /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { - if (engine) { - return &engine->AccessGuestDriverProfile(); - } - return nullptr; + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { + return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; } private: const Tegra::Engines::ShaderType stage; + VideoCore::GuestDriverProfile stored_guest_driver_profile; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..af4490d66 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, const std::list& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); - return; - } - if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { return; } u32 count{}; @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, bound_offsets.emplace_back(sampler.GetOffset()); } if (count > 1) { - gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); } } std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, - VideoCore::GuestDriverProfile* gpu_driver, + VideoCore::GuestDriverProfile& gpu_driver, const std::list& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); - return std::nullopt; - } const u32 base_offset = sampler_to_deduce.GetOffset(); u32 max_offset{std::numeric_limits::max()}; for (const auto& sampler : used_samplers) { @@ -77,7 +69,7 @@ std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, if (max_offset == std::numeric_limits::max()) { return std::nullopt; } - return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); + return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); } } // Anonymous namespace diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..b1a0aa00c 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -94,13 +94,10 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons } auto [gpr, base_offset] = *pair; const auto offset_inm = std::get_if(&*base_offset); - auto gpu_driver = locker.AccessGuestDriverProfile(); - if (gpu_driver == nullptr) { - return {}; - } + const auto& gpu_driver = locker.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, - Immediate(gpu_driver->GetTextureHandlerSize())); + const Node op = + Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); -- cgit v1.2.3 From e8efd5a90100a86899e31a4de0137e915e0e0366 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 Feb 2020 20:53:10 -0300 Subject: video_core: Rename "const buffer locker" to "registry" --- src/video_core/shader/const_buffer_locker.cpp | 127 -------------------------- src/video_core/shader/const_buffer_locker.h | 102 --------------------- src/video_core/shader/control_flow.cpp | 13 +-- src/video_core/shader/control_flow.h | 3 +- src/video_core/shader/decode.cpp | 4 +- src/video_core/shader/decode/texture.cpp | 5 +- src/video_core/shader/registry.cpp | 127 ++++++++++++++++++++++++++ src/video_core/shader/registry.h | 102 +++++++++++++++++++++ src/video_core/shader/shader_ir.cpp | 5 +- src/video_core/shader/shader_ir.h | 6 +- src/video_core/shader/track.cpp | 4 +- 11 files changed, 251 insertions(+), 247 deletions(-) delete mode 100644 src/video_core/shader/const_buffer_locker.cpp delete mode 100644 src/video_core/shader/const_buffer_locker.h create mode 100644 src/video_core/shader/registry.cpp create mode 100644 src/video_core/shader/registry.h (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index c859dd7ca..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/const_buffer_locker.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::SamplerDescriptor; - -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile) - : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} - -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine} {} - -ConstBufferLocker::~ConstBufferLocker() = default; - -std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional ConstBufferLocker::ObtainBindlessSampler( - u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -std::optional ConstBufferLocker::ObtainBoundBuffer() { - if (bound_buffer_saved) { - return bound_buffer; - } - if (!engine) { - return std::nullopt; - } - bound_buffer_saved = true; - bound_buffer = engine->GetBoundBuffer(); - return bound_buffer; -} - -void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -void ConstBufferLocker::SetBoundBuffer(u32 buffer) { - bound_buffer_saved = true; - bound_buffer = buffer; -} - -bool ConstBufferLocker::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index 7c6f7bbdd..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map; -using BindlessSamplerMap = - std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -/** - * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader - * compiler. with it, the shader can obtain required data from GPU state and store it for disk - * shader compilation. - */ -class ConstBufferLocker { -public: - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile); - - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine); - - ~ConstBufferLocker(); - - /// Retrieves a key from the locker, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional ObtainKey(u32 buffer, u32 offset); - - std::optional ObtainBoundSampler(u32 offset); - - std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - - std::optional ObtainBoundBuffer(); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Set the bound buffer for this locker. - void SetBoundBuffer(u32 buffer); - - /// Checks keys and samplers against engine's current const buffers. Returns true if they are - /// the same value, false otherwise; - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the locker. - bool HasEqualKeys(const ConstBufferLocker& rhs) const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - BindlessSamplerMap bindless_samplers; - bool bound_buffer_saved{}; - u32 bound_buffer{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -64,11 +65,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) - : program_code{program_code}, locker{locker}, start{start} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) + : program_code{program_code}, registry{registry}, start{start} {} const ProgramCode& program_code; - ConstBufferLocker& locker; + Registry& registry; u32 start{}; std::vector block_info; std::list inspect_queries; @@ -438,7 +439,7 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) const s32 pc_target = offset + result.relative_position; std::vector branches; for (u32 i = 0; i < result.entries; i++) { - auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); if (!key) { return {ParseResult::AbnormalFlow, parse_info}; } @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, - ConstBufferLocker& locker) { + Registry& registry) { auto result_out = std::make_unique(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, start_address, locker}; + CFGRebuildState state{program_code, start_address, registry}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -12,6 +12,7 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -111,6 +112,6 @@ struct ShaderCharacteristics { std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, - ConstBufferLocker& locker); + Registry& registry); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index af4490d66..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -141,7 +141,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, locker); + auto info = ScanFlow(program_code, main_offset, settings, registry); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -356,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { void ShaderIR::PostDecode() { // Deduce texture handler size if needed - auto gpu_driver = locker.AccessGuestDriverProfile(); + auto gpu_driver = registry.AccessGuestDriverProfile(); DeduceTextureHandlerSize(gpu_driver, used_samplers); // Deduce Indexed Samplers if (!uses_indexed_samplers) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -12,6 +12,7 @@ #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional sample if (sampler_info) { return *sampler_info; } - const auto sampler = - buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); + const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) + : registry.ObtainBoundSampler(offset); if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); return SamplerInfo{TextureType::Texture2D, false, false, false}; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..7126caf98 --- /dev/null +++ b/src/video_core/shader/registry.cpp @@ -0,0 +1,127 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" + +namespace VideoCommon::Shader { + +using Tegra::Engines::SamplerDescriptor; + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile) + : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine) + : stage{shader_stage}, engine{&engine} {} + +Registry::~Registry() = default; + +std::optional Registry::ObtainKey(u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; +} + +std::optional Registry::ObtainBoundSampler(u32 offset) { + const u32 key = offset; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; +} + +std::optional Registry::ObtainBindlessSampler(u32 buffer, + u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; +} + +std::optional Registry::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + +void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { + keys.insert_or_assign({buffer, offset}, value); +} + +void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); +} + +void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); +} + +void Registry::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + +bool Registry::IsConsistent() const { + if (!engine) { + return true; + } + return std::all_of(keys.begin(), keys.end(), + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + return value == engine->AccessBoundSampler(stage, key); + }) && + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool Registry::HasEqualKeys(const Registry& rhs) const { + return std::tie(keys, bound_samplers, bindless_samplers) == + std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..a5487e1d7 --- /dev/null +++ b/src/video_core/shader/registry.h @@ -0,0 +1,102 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" + +namespace VideoCommon::Shader { + +using KeyMap = std::unordered_map, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map; +using BindlessSamplerMap = + std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + +/** + * The Registry is a class use to interface the 3D and compute engines with the shader compiler. + * With it, the shader can obtain required data from GPU state and store it for disk shader + * compilation. + */ +class Registry { +public: + explicit Registry(Tegra::Engines::ShaderType shader_stage, + VideoCore::GuestDriverProfile stored_guest_driver_profile); + + explicit Registry(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine); + + ~Registry(); + + /// Retrieves a key from the registry, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. + std::optional ObtainKey(u32 buffer, u32 offset); + + std::optional ObtainBoundSampler(u32 offset); + + std::optional ObtainBindlessSampler(u32 buffer, u32 offset); + + std::optional ObtainBoundBuffer(); + + /// Inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + /// Inserts a bound sampler key. + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Inserts a bindless sampler key. + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Set the bound buffer for this registry. + void SetBoundBuffer(u32 buffer); + + /// Checks keys and samplers against engine's current const buffers. + /// Returns true if they are the same value, false otherwise. + bool IsConsistent() const; + + /// Returns true if the keys are equal to the other ones in the registry. + bool HasEqualKeys(const Registry& rhs) const; + + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { + return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; + } + +private: + const Tegra::Engines::ShaderType stage; + VideoCore::GuestDriverProfile stored_guest_driver_profile; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..425927777 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -11,6 +11,7 @@ #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; using Tegra::Shader::Register; ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - ConstBufferLocker& locker) - : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { + Registry& registry) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { Decode(); PostDecode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..dde036b40 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,8 +18,8 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" -#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" +#include "video_core/shader/registry.h" namespace VideoCommon::Shader { @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - ConstBufferLocker& locker); + Registry& registry); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -414,7 +414,7 @@ private: const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; - ConstBufferLocker& locker; + Registry& registry; bool decompiled{}; bool disable_flow_stack{}; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index b1a0aa00c..831219841 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -81,7 +81,7 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons MakeTrackSampler(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; } else if (const auto operation = std::get_if(&*offset)) { - auto bound_buffer = locker.ObtainBoundBuffer(); + const auto bound_buffer = registry.ObtainBoundBuffer(); if (!bound_buffer) { return {}; } @@ -94,7 +94,7 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons } auto [gpr, base_offset] = *pair; const auto offset_inm = std::get_if(&*base_offset); - const auto& gpu_driver = locker.AccessGuestDriverProfile(); + const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); const Node op = Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); -- cgit v1.2.3 From 0528be5c92db67b608dc64322c55e57629c80619 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Feb 2020 03:49:51 -0300 Subject: shader/registry: Store graphics and compute metadata Store information GLSL forces us to provide but it's dynamic state in hardware (workgroup sizes, primitive topology, shared memory size). --- src/video_core/shader/registry.cpp | 59 ++++++++++++++++++++++++-------------- src/video_core/shader/registry.h | 49 +++++++++++++++++++++++++------ src/video_core/shader/track.cpp | 9 ++---- 3 files changed, 81 insertions(+), 36 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 7126caf98..dc2d3dce3 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -6,21 +6,55 @@ #include #include "common/common_types.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/shader/registry.h" namespace VideoCommon::Shader { +using Tegra::Engines::ConstBufferEngineInterface; using Tegra::Engines::SamplerDescriptor; +using Tegra::Engines::ShaderType; -Registry::Registry(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile) - : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} +namespace { + +GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage == ShaderType::Compute) { + return {}; + } + auto& graphics = static_cast(engine); + + GraphicsInfo info; + info.primitive_topology = graphics.regs.draw.topology; + return info; +} + +ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage != ShaderType::Compute) { + return {}; + } + auto& compute = static_cast(engine); + const auto& launch = compute.launch_description; + + ComputeInfo info; + info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; + info.local_memory_size_in_words = launch.local_pos_alloc; + info.shared_memory_size_in_words = launch.shared_alloc; + return info; +} + +} // Anonymous namespace + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) + : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, + bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} Registry::Registry(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine} {} + : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( + shader_stage, engine)} {} Registry::~Registry() = default; @@ -67,18 +101,6 @@ std::optional Registry::ObtainBindlessSampler return value; } -std::optional Registry::ObtainBoundBuffer() { - if (bound_buffer_saved) { - return bound_buffer; - } - if (!engine) { - return std::nullopt; - } - bound_buffer_saved = true; - bound_buffer = engine->GetBoundBuffer(); - return bound_buffer; -} - void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s bindless_samplers.insert_or_assign({buffer, offset}, sampler); } -void Registry::SetBoundBuffer(u32 buffer) { - bound_buffer_saved = true; - bound_buffer = buffer; -} - bool Registry::IsConsistent() const { if (!engine) { return true; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index a5487e1d7..c1a04ea02 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -4,11 +4,16 @@ #pragma once +#include #include +#include #include +#include + #include "common/common_types.h" #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/guest_driver.h" @@ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; +struct GraphicsInfo { + Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; +}; +static_assert(std::is_trivially_copyable_v); + +struct ComputeInfo { + std::array workgroup_size{}; + u32 shared_memory_size_in_words = 0; + u32 local_memory_size_in_words = 0; +}; +static_assert(std::is_trivially_copyable_v); + +struct SerializedRegistryInfo { + VideoCore::GuestDriverProfile guest_driver_profile; + u32 bound_buffer = 0; + GraphicsInfo graphics; + ComputeInfo compute; +}; + /** * The Registry is a class use to interface the 3D and compute engines with the shader compiler. * With it, the shader can obtain required data from GPU state and store it for disk shader @@ -26,8 +50,7 @@ using BindlessSamplerMap = */ class Registry { public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile); + explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); explicit Registry(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); @@ -42,8 +65,6 @@ public: std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - std::optional ObtainBoundBuffer(); - /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -53,9 +74,6 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - /// Set the bound buffer for this registry. - void SetBoundBuffer(u32 buffer); - /// Checks keys and samplers against engine's current const buffers. /// Returns true if they are the same value, false otherwise. bool IsConsistent() const; @@ -83,6 +101,18 @@ public: return bound_buffer; } + /// Returns compute information from this shader + const GraphicsInfo& GetGraphicsInfo() const { + ASSERT(stage != Tegra::Engines::ShaderType::Compute); + return graphics_info; + } + + /// Returns compute information from this shader + const ComputeInfo& GetComputeInfo() const { + ASSERT(stage == Tegra::Engines::ShaderType::Compute); + return compute_info; + } + /// Obtains access to the guest driver's profile. VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; @@ -95,8 +125,9 @@ private: KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; - bool bound_buffer_saved{}; - u32 bound_buffer{}; + u32 bound_buffer; + GraphicsInfo graphics_info; + ComputeInfo compute_info; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 831219841..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -81,14 +81,11 @@ std::tuple ShaderIR::TrackBindlessSampler(Node tracked, cons MakeTrackSampler(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; } else if (const auto operation = std::get_if(&*offset)) { - const auto bound_buffer = registry.ObtainBoundBuffer(); - if (!bound_buffer) { + const u32 bound_buffer = registry.GetBoundBuffer(); + if (bound_buffer != cbuf->GetIndex()) { return {}; } - if (*bound_buffer != cbuf->GetIndex()) { - return {}; - } - auto pair = DecoupleIndirectRead(*operation); + const auto pair = DecoupleIndirectRead(*operation); if (!pair) { return {}; } -- cgit v1.2.3 From 66a8a3e88719aaa65a96dd0289e1fb151d199d9b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Feb 2020 04:03:22 -0300 Subject: shader/registry: Cache tessellation state --- src/video_core/shader/registry.cpp | 3 +++ src/video_core/shader/registry.h | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index dc2d3dce3..90dfab293 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -27,6 +27,9 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac GraphicsInfo info; info.primitive_topology = graphics.regs.draw.topology; + info.tessellation_primitive = graphics.regs.tess_mode.prim; + info.tessellation_spacing = graphics.regs.tess_mode.spacing; + info.tessellation_clockwise = graphics.regs.tess_mode.cw; return info; } diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index c1a04ea02..7b7fad3d1 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -26,15 +26,19 @@ using BindlessSamplerMap = struct GraphicsInfo { Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; + Tegra::Engines::Maxwell3D::Regs::TessellationPrimitive tessellation_primitive{}; + Tegra::Engines::Maxwell3D::Regs::TessellationSpacing tessellation_spacing{}; + bool tessellation_clockwise = false; }; -static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_copyable_v && + std::is_standard_layout_v); struct ComputeInfo { std::array workgroup_size{}; u32 shared_memory_size_in_words = 0; u32 local_memory_size_in_words = 0; }; -static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_copyable_v && std::is_standard_layout_v); struct SerializedRegistryInfo { VideoCore::GuestDriverProfile guest_driver_profile; -- cgit v1.2.3 From b1acb4f73f79a555480d1405bc9732cab111f6e2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 01:08:10 -0300 Subject: shader/registry: Address feedback --- src/video_core/shader/registry.cpp | 11 +++++++++++ src/video_core/shader/registry.h | 18 ++++++------------ 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 90dfab293..4a1e16c1e 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -5,6 +5,7 @@ #include #include +#include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" @@ -144,4 +145,14 @@ bool Registry::HasEqualKeys(const Registry& rhs) const { std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); } +const GraphicsInfo& Registry::GetGraphicsInfo() const { + ASSERT(stage != Tegra::Engines::ShaderType::Compute); + return graphics_info; +} + +const ComputeInfo& Registry::GetComputeInfo() const { + ASSERT(stage == Tegra::Engines::ShaderType::Compute); + return compute_info; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 7b7fad3d1..07998c4db 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -85,6 +85,12 @@ public: /// Returns true if the keys are equal to the other ones in the registry. bool HasEqualKeys(const Registry& rhs) const; + /// Returns graphics information from this shader + const GraphicsInfo& GetGraphicsInfo() const; + + /// Returns compute information from this shader + const ComputeInfo& GetComputeInfo() const; + /// Gives an getter to the const buffer keys in the database. const KeyMap& GetKeys() const { return keys; @@ -105,18 +111,6 @@ public: return bound_buffer; } - /// Returns compute information from this shader - const GraphicsInfo& GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; - } - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; - } - /// Obtains access to the guest driver's profile. VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; -- cgit v1.2.3 From eb5861e0a22851cd2b2ca38136bfc7870790836e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 01:54:00 -0300 Subject: engines/maxwell_3d: Add TFB registers and store them in shader registry --- src/video_core/shader/registry.cpp | 3 +++ src/video_core/shader/registry.h | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 4a1e16c1e..af70b3f35 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -27,9 +27,12 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac auto& graphics = static_cast(engine); GraphicsInfo info; + info.tfb_layouts = graphics.regs.tfb_layouts; + info.tfb_varying_locs = graphics.regs.tfb_varying_locs; info.primitive_topology = graphics.regs.draw.topology; info.tessellation_primitive = graphics.regs.tess_mode.prim; info.tessellation_spacing = graphics.regs.tess_mode.spacing; + info.tfb_enabled = graphics.regs.tfb_enabled; info.tessellation_clockwise = graphics.regs.tess_mode.cw; return info; } diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 07998c4db..0c80d35fd 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -25,9 +25,15 @@ using BindlessSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; struct GraphicsInfo { - Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; - Tegra::Engines::Maxwell3D::Regs::TessellationPrimitive tessellation_primitive{}; - Tegra::Engines::Maxwell3D::Regs::TessellationSpacing tessellation_spacing{}; + using Maxwell = Tegra::Engines::Maxwell3D::Regs; + + std::array + tfb_layouts{}; + std::array, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; + Maxwell::PrimitiveTopology primitive_topology{}; + Maxwell::TessellationPrimitive tessellation_primitive{}; + Maxwell::TessellationSpacing tessellation_spacing{}; + bool tfb_enabled = false; bool tessellation_clockwise = false; }; static_assert(std::is_trivially_copyable_v && -- cgit v1.2.3