diff options
| author | 2020-02-01 20:41:40 -0500 | |
|---|---|---|
| committer | 2020-02-01 20:41:40 -0500 | |
| commit | b5bbe7e752d5d36839a86638bfaa4b4c348497cd (patch) | |
| tree | b16b3f8ce5ec6233f9f822ad56418d74f0cd47ae /src/video_core/shader | |
| parent | Merge pull request #3268 from CJBok/deadzone (diff) | |
| parent | Shader_IR: Address feedback. (diff) | |
| download | yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.tar.gz yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.tar.xz yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.zip | |
Merge pull request #3282 from FernandoS27/indexed-samplers
Partially implement Indexed samplers in general and specific code in GLSL
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 21 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 68 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 87 | ||||
| -rw-r--r-- | src/video_core/shader/node_helper.h | 6 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 16 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 106 |
9 files changed, 397 insertions, 43 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle | |||
| 66 | return value; | 66 | return value; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||
| 70 | if (bound_buffer_saved) { | ||
| 71 | return bound_buffer; | ||
| 72 | } | ||
| 73 | if (!engine) { | ||
| 74 | return std::nullopt; | ||
| 75 | } | ||
| 76 | bound_buffer_saved = true; | ||
| 77 | bound_buffer = engine->GetBoundBuffer(); | ||
| 78 | return bound_buffer; | ||
| 79 | } | ||
| 80 | |||
| 69 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | 81 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { |
| 70 | keys.insert_or_assign({buffer, offset}, value); | 82 | keys.insert_or_assign({buffer, offset}, value); |
| 71 | } | 83 | } |
| @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes | |||
| 78 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | 90 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); |
| 79 | } | 91 | } |
| 80 | 92 | ||
| 93 | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||
| 94 | bound_buffer_saved = true; | ||
| 95 | bound_buffer = buffer; | ||
| 96 | } | ||
| 97 | |||
| 81 | bool ConstBufferLocker::IsConsistent() const { | 98 | bool ConstBufferLocker::IsConsistent() const { |
| 82 | if (!engine) { | 99 | if (!engine) { |
| 83 | return false; | 100 | return false; |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/hash.h" | 10 | #include "common/hash.h" |
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | 11 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 12 | #include "video_core/engines/shader_type.h" | 12 | #include "video_core/engines/shader_type.h" |
| 13 | #include "video_core/guest_driver.h" | ||
| 13 | 14 | ||
| 14 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 15 | 16 | ||
| @@ -40,6 +41,8 @@ public: | |||
| 40 | 41 | ||
| 41 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | 42 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |
| 42 | 43 | ||
| 44 | std::optional<u32> ObtainBoundBuffer(); | ||
| 45 | |||
| 43 | /// Inserts a key. | 46 | /// Inserts a key. |
| 44 | void InsertKey(u32 buffer, u32 offset, u32 value); | 47 | void InsertKey(u32 buffer, u32 offset, u32 value); |
| 45 | 48 | ||
| @@ -49,6 +52,9 @@ public: | |||
| 49 | /// Inserts a bindless sampler key. | 52 | /// Inserts a bindless sampler key. |
| 50 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | 53 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); |
| 51 | 54 | ||
| 55 | /// Set the bound buffer for this locker. | ||
| 56 | void SetBoundBuffer(u32 buffer); | ||
| 57 | |||
| 52 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | 58 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are |
| 53 | /// the same value, false otherwise; | 59 | /// the same value, false otherwise; |
| 54 | bool IsConsistent() const; | 60 | bool IsConsistent() const; |
| @@ -71,12 +77,27 @@ public: | |||
| 71 | return bindless_samplers; | 77 | return bindless_samplers; |
| 72 | } | 78 | } |
| 73 | 79 | ||
| 80 | /// Gets bound buffer used on this shader | ||
| 81 | u32 GetBoundBuffer() const { | ||
| 82 | return bound_buffer; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Obtains access to the guest driver's profile. | ||
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||
| 87 | if (engine) { | ||
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | ||
| 92 | |||
| 74 | private: | 93 | private: |
| 75 | const Tegra::Engines::ShaderType stage; | 94 | const Tegra::Engines::ShaderType stage; |
| 76 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
| 77 | KeyMap keys; | 96 | KeyMap keys; |
| 78 | BoundSamplerMap bound_samplers; | 97 | BoundSamplerMap bound_samplers; |
| 79 | BindlessSamplerMap bindless_samplers; | 98 | BindlessSamplerMap bindless_samplers; |
| 99 | bool bound_buffer_saved{}; | ||
| 100 | u32 bound_buffer{}; | ||
| 80 | }; | 101 | }; |
| 81 | 102 | ||
| 82 | } // namespace VideoCommon::Shader | 103 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <limits> | ||
| 6 | #include <set> | 7 | #include <set> |
| 7 | 8 | ||
| 8 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 33 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 34 | } | 35 | } |
| 35 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | ||
| 38 | const std::list<Sampler>& used_samplers) { | ||
| 39 | if (gpu_driver == nullptr) { | ||
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | ||
| 45 | } | ||
| 46 | u32 count{}; | ||
| 47 | std::vector<u32> bound_offsets; | ||
| 48 | for (const auto& sampler : used_samplers) { | ||
| 49 | if (sampler.IsBindless()) { | ||
| 50 | continue; | ||
| 51 | } | ||
| 52 | ++count; | ||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | ||
| 54 | } | ||
| 55 | if (count > 1) { | ||
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | ||
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | ||
| 62 | const std::list<Sampler>& used_samplers) { | ||
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | ||
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | ||
| 69 | for (const auto& sampler : used_samplers) { | ||
| 70 | if (sampler.IsBindless()) { | ||
| 71 | continue; | ||
| 72 | } | ||
| 73 | if (sampler.GetOffset() > base_offset) { | ||
| 74 | max_offset = std::min(sampler.GetOffset(), max_offset); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | ||
| 78 | return std::nullopt; | ||
| 79 | } | ||
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | ||
| 81 | } | ||
| 82 | |||
| 36 | } // Anonymous namespace | 83 | } // Anonymous namespace |
| 37 | 84 | ||
| 38 | class ASTDecoder { | 85 | class ASTDecoder { |
| @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 315 | return pc + 1; | 362 | return pc + 1; |
| 316 | } | 363 | } |
| 317 | 364 | ||
| 365 | void ShaderIR::PostDecode() { | ||
| 366 | // Deduce texture handler size if needed | ||
| 367 | auto gpu_driver = locker.AccessGuestDriverProfile(); | ||
| 368 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||
| 369 | // Deduce Indexed Samplers | ||
| 370 | if (!uses_indexed_samplers) { | ||
| 371 | return; | ||
| 372 | } | ||
| 373 | for (auto& sampler : used_samplers) { | ||
| 374 | if (!sampler.IsIndexed()) { | ||
| 375 | continue; | ||
| 376 | } | ||
| 377 | if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||
| 378 | sampler.SetSize(*size); | ||
| 379 | } else { | ||
| 380 | LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||
| 381 | sampler.SetSize(1); | ||
| 382 | } | ||
| 383 | } | ||
| 384 | } | ||
| 385 | |||
| 318 | } // namespace VideoCommon::Shader | 386 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..d980535b1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 144 | Node4 values; | 144 | Node4 values; |
| 145 | for (u32 element = 0; element < values.size(); ++element) { | 145 | for (u32 element = 0; element < values.size(); ++element) { |
| 146 | auto coords_copy = coords; | 146 | auto coords_copy = coords; |
| 147 | MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; | 147 | MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, |
| 148 | {}, {}, component, element, {}}; | ||
| 148 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 149 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 149 | } | 150 | } |
| 150 | 151 | ||
| @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 167 | const auto derivate_reg = instr.gpr20.Value(); | 168 | const auto derivate_reg = instr.gpr20.Value(); |
| 168 | const auto texture_type = instr.txd.texture_type.Value(); | 169 | const auto texture_type = instr.txd.texture_type.Value(); |
| 169 | const auto coord_count = GetCoordCount(texture_type); | 170 | const auto coord_count = GetCoordCount(texture_type); |
| 170 | 171 | Node index_var{}; | |
| 171 | const Sampler* sampler = | 172 | const Sampler* sampler = |
| 172 | is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) | 173 | is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) |
| 173 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); | 174 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 174 | Node4 values; | 175 | Node4 values; |
| 175 | if (sampler == nullptr) { | 176 | if (sampler == nullptr) { |
| @@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 200 | } | 201 | } |
| 201 | 202 | ||
| 202 | for (u32 element = 0; element < values.size(); ++element) { | 203 | for (u32 element = 0; element < values.size(); ++element) { |
| 203 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; | 204 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, |
| 205 | {}, {}, {}, element, index_var}; | ||
| 204 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | 206 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); |
| 205 | } | 207 | } |
| 206 | 208 | ||
| @@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 215 | // TODO: The new commits on the texture refactor, change the way samplers work. | 217 | // TODO: The new commits on the texture refactor, change the way samplers work. |
| 216 | // Sadly, not all texture instructions specify the type of texture their sampler | 218 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 217 | // uses. This must be fixed at a later instance. | 219 | // uses. This must be fixed at a later instance. |
| 220 | Node index_var{}; | ||
| 218 | const Sampler* sampler = | 221 | const Sampler* sampler = |
| 219 | is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); | 222 | is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); |
| 220 | 223 | ||
| 221 | if (sampler == nullptr) { | 224 | if (sampler == nullptr) { |
| 222 | u32 indexer = 0; | 225 | u32 indexer = 0; |
| @@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 240 | if (!instr.txq.IsComponentEnabled(element)) { | 243 | if (!instr.txq.IsComponentEnabled(element)) { |
| 241 | continue; | 244 | continue; |
| 242 | } | 245 | } |
| 243 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; | 246 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; |
| 244 | const Node value = | 247 | const Node value = |
| 245 | Operation(OperationCode::TextureQueryDimensions, meta, | 248 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 246 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 249 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| @@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 266 | 269 | ||
| 267 | auto texture_type = instr.tmml.texture_type.Value(); | 270 | auto texture_type = instr.tmml.texture_type.Value(); |
| 268 | const bool is_array = instr.tmml.array != 0; | 271 | const bool is_array = instr.tmml.array != 0; |
| 272 | Node index_var{}; | ||
| 269 | const Sampler* sampler = | 273 | const Sampler* sampler = |
| 270 | is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); | 274 | is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); |
| 271 | 275 | ||
| 272 | if (sampler == nullptr) { | 276 | if (sampler == nullptr) { |
| 273 | u32 indexer = 0; | 277 | u32 indexer = 0; |
| @@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 309 | continue; | 313 | continue; |
| 310 | } | 314 | } |
| 311 | auto params = coords; | 315 | auto params = coords; |
| 312 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; | 316 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; |
| 313 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 317 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 314 | SetTemporary(bb, indexer++, value); | 318 | SetTemporary(bb, indexer++, value); |
| 315 | } | 319 | } |
| @@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, | |||
| 383 | // Otherwise create a new mapping for this sampler | 387 | // Otherwise create a new mapping for this sampler |
| 384 | const auto next_index = static_cast<u32>(used_samplers.size()); | 388 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 385 | return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, | 389 | return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, |
| 386 | info.is_buffer); | 390 | info.is_buffer, false); |
| 387 | } | 391 | } |
| 388 | 392 | ||
| 389 | const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | 393 | const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, |
| 390 | std::optional<SamplerInfo> sampler_info) { | 394 | std::optional<SamplerInfo> sampler_info) { |
| 391 | const Node sampler_register = GetRegister(reg); | 395 | const Node sampler_register = GetRegister(reg); |
| 392 | const auto [base_sampler, buffer, offset] = | 396 | const auto [base_node, tracked_sampler_info] = |
| 393 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 397 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 394 | ASSERT(base_sampler != nullptr); | 398 | ASSERT(base_node != nullptr); |
| 395 | if (base_sampler == nullptr) { | 399 | if (base_node == nullptr) { |
| 396 | return nullptr; | 400 | return nullptr; |
| 397 | } | 401 | } |
| 398 | 402 | ||
| 399 | const auto info = GetSamplerInfo(sampler_info, offset, buffer); | 403 | if (const auto bindless_sampler_info = |
| 404 | std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||
| 405 | const u32 buffer = bindless_sampler_info->GetIndex(); | ||
| 406 | const u32 offset = bindless_sampler_info->GetOffset(); | ||
| 407 | const auto info = GetSamplerInfo(sampler_info, offset, buffer); | ||
| 408 | |||
| 409 | // If this sampler has already been used, return the existing mapping. | ||
| 410 | const auto it = | ||
| 411 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 412 | [buffer = buffer, offset = offset](const Sampler& entry) { | ||
| 413 | return entry.GetBuffer() == buffer && entry.GetOffset() == offset; | ||
| 414 | }); | ||
| 415 | if (it != used_samplers.end()) { | ||
| 416 | ASSERT(it->IsBindless() && it->GetType() == info.type && | ||
| 417 | it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); | ||
| 418 | return &*it; | ||
| 419 | } | ||
| 400 | 420 | ||
| 401 | // If this sampler has already been used, return the existing mapping. | 421 | // Otherwise create a new mapping for this sampler |
| 402 | const auto it = | 422 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 403 | std::find_if(used_samplers.begin(), used_samplers.end(), | 423 | return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, |
| 404 | [buffer = buffer, offset = offset](const Sampler& entry) { | 424 | info.is_shadow, info.is_buffer, false); |
| 405 | return entry.GetBuffer() == buffer && entry.GetOffset() == offset; | 425 | } else if (const auto array_sampler_info = |
| 406 | }); | 426 | std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { |
| 407 | if (it != used_samplers.end()) { | 427 | const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; |
| 408 | ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && | 428 | index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); |
| 409 | it->IsShadow() == info.is_shadow); | 429 | const auto info = GetSamplerInfo(sampler_info, base_offset); |
| 410 | return &*it; | 430 | |
| 411 | } | 431 | // If this sampler has already been used, return the existing mapping. |
| 432 | const auto it = std::find_if( | ||
| 433 | used_samplers.begin(), used_samplers.end(), | ||
| 434 | [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); | ||
| 435 | if (it != used_samplers.end()) { | ||
| 436 | ASSERT(!it->IsBindless() && it->GetType() == info.type && | ||
| 437 | it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && | ||
| 438 | it->IsBuffer() == info.is_buffer && it->IsIndexed()); | ||
| 439 | return &*it; | ||
| 440 | } | ||
| 412 | 441 | ||
| 413 | // Otherwise create a new mapping for this sampler | 442 | uses_indexed_samplers = true; |
| 414 | const auto next_index = static_cast<u32>(used_samplers.size()); | 443 | // Otherwise create a new mapping for this sampler |
| 415 | return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, | 444 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 416 | info.is_shadow, info.is_buffer); | 445 | return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, |
| 446 | info.is_shadow, info.is_buffer, true); | ||
| 447 | } | ||
| 448 | return nullptr; | ||
| 417 | } | 449 | } |
| 418 | 450 | ||
| 419 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | 451 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { |
| @@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 499 | "This method is not supported."); | 531 | "This method is not supported."); |
| 500 | 532 | ||
| 501 | const SamplerInfo info{texture_type, is_array, is_shadow, false}; | 533 | const SamplerInfo info{texture_type, is_array, is_shadow, false}; |
| 502 | const Sampler* sampler = | 534 | Node index_var{}; |
| 503 | is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); | 535 | const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) |
| 536 | : GetSampler(instr.sampler, info); | ||
| 504 | Node4 values; | 537 | Node4 values; |
| 505 | if (sampler == nullptr) { | 538 | if (sampler == nullptr) { |
| 506 | for (u32 element = 0; element < values.size(); ++element) { | 539 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 548 | 581 | ||
| 549 | for (u32 element = 0; element < values.size(); ++element) { | 582 | for (u32 element = 0; element < values.size(); ++element) { |
| 550 | auto copy_coords = coords; | 583 | auto copy_coords = coords; |
| 551 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; | 584 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, |
| 585 | lod, {}, element, index_var}; | ||
| 552 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | 586 | values[element] = Operation(read_method, meta, std::move(copy_coords)); |
| 553 | } | 587 | } |
| 554 | 588 | ||
| @@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 663 | u64 parameter_register = instr.gpr20.Value(); | 697 | u64 parameter_register = instr.gpr20.Value(); |
| 664 | 698 | ||
| 665 | const SamplerInfo info{texture_type, is_array, depth_compare, false}; | 699 | const SamplerInfo info{texture_type, is_array, depth_compare, false}; |
| 666 | const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) | 700 | Node index_var{}; |
| 701 | const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) | ||
| 667 | : GetSampler(instr.sampler, info); | 702 | : GetSampler(instr.sampler, info); |
| 668 | Node4 values; | 703 | Node4 values; |
| 669 | if (sampler == nullptr) { | 704 | if (sampler == nullptr) { |
| @@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 692 | for (u32 element = 0; element < values.size(); ++element) { | 727 | for (u32 element = 0; element < values.size(); ++element) { |
| 693 | auto coords_copy = coords; | 728 | auto coords_copy = coords; |
| 694 | MetaTexture meta{ | 729 | MetaTexture meta{ |
| 695 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; | 730 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, |
| 731 | index_var}; | ||
| 696 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 732 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 697 | } | 733 | } |
| 698 | 734 | ||
| @@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 725 | Node4 values; | 761 | Node4 values; |
| 726 | for (u32 element = 0; element < values.size(); ++element) { | 762 | for (u32 element = 0; element < values.size(); ++element) { |
| 727 | auto coords_copy = coords; | 763 | auto coords_copy = coords; |
| 728 | MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; | 764 | MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; |
| 729 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | 765 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 730 | } | 766 | } |
| 731 | 767 | ||
| @@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 775 | Node4 values; | 811 | Node4 values; |
| 776 | for (u32 element = 0; element < values.size(); ++element) { | 812 | for (u32 element = 0; element < values.size(); ++element) { |
| 777 | auto coords_copy = coords; | 813 | auto coords_copy = coords; |
| 778 | MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; | 814 | MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; |
| 779 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | 815 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 780 | } | 816 | } |
| 781 | return values; | 817 | return values; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 9af1f0228..5f83403db 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -212,6 +212,7 @@ enum class MetaStackClass { | |||
| 212 | class OperationNode; | 212 | class OperationNode; |
| 213 | class ConditionalNode; | 213 | class ConditionalNode; |
| 214 | class GprNode; | 214 | class GprNode; |
| 215 | class CustomVarNode; | ||
| 215 | class ImmediateNode; | 216 | class ImmediateNode; |
| 216 | class InternalFlagNode; | 217 | class InternalFlagNode; |
| 217 | class PredicateNode; | 218 | class PredicateNode; |
| @@ -223,26 +224,32 @@ class SmemNode; | |||
| 223 | class GmemNode; | 224 | class GmemNode; |
| 224 | class CommentNode; | 225 | class CommentNode; |
| 225 | 226 | ||
| 226 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, | 227 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, |
| 227 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, | 228 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, |
| 228 | LmemNode, SmemNode, GmemNode, CommentNode>; | 229 | LmemNode, SmemNode, GmemNode, CommentNode>; |
| 229 | using Node = std::shared_ptr<NodeData>; | 230 | using Node = std::shared_ptr<NodeData>; |
| 230 | using Node4 = std::array<Node, 4>; | 231 | using Node4 = std::array<Node, 4>; |
| 231 | using NodeBlock = std::vector<Node>; | 232 | using NodeBlock = std::vector<Node>; |
| 232 | 233 | ||
| 234 | class BindlessSamplerNode; | ||
| 235 | class ArraySamplerNode; | ||
| 236 | |||
| 237 | using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; | ||
| 238 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | ||
| 239 | |||
| 233 | class Sampler { | 240 | class Sampler { |
| 234 | public: | 241 | public: |
| 235 | /// This constructor is for bound samplers | 242 | /// This constructor is for bound samplers |
| 236 | constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, | 243 | constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, |
| 237 | bool is_array, bool is_shadow, bool is_buffer) | 244 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) |
| 238 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, | 245 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |
| 239 | is_buffer{is_buffer} {} | 246 | is_buffer{is_buffer}, is_indexed{is_indexed} {} |
| 240 | 247 | ||
| 241 | /// This constructor is for bindless samplers | 248 | /// This constructor is for bindless samplers |
| 242 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, | 249 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, |
| 243 | bool is_array, bool is_shadow, bool is_buffer) | 250 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) |
| 244 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, | 251 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, |
| 245 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} | 252 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} |
| 246 | 253 | ||
| 247 | constexpr u32 GetIndex() const { | 254 | constexpr u32 GetIndex() const { |
| 248 | return index; | 255 | return index; |
| @@ -276,16 +283,72 @@ public: | |||
| 276 | return is_bindless; | 283 | return is_bindless; |
| 277 | } | 284 | } |
| 278 | 285 | ||
| 286 | constexpr bool IsIndexed() const { | ||
| 287 | return is_indexed; | ||
| 288 | } | ||
| 289 | |||
| 290 | constexpr u32 Size() const { | ||
| 291 | return size; | ||
| 292 | } | ||
| 293 | |||
| 294 | constexpr void SetSize(u32 new_size) { | ||
| 295 | size = new_size; | ||
| 296 | } | ||
| 297 | |||
| 279 | private: | 298 | private: |
| 280 | u32 index{}; ///< Emulated index given for the this sampler. | 299 | u32 index{}; ///< Emulated index given for the this sampler. |
| 281 | u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. | 300 | u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. |
| 282 | u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). | 301 | u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). |
| 302 | u32 size{}; ///< Size of the sampler if indexed. | ||
| 283 | 303 | ||
| 284 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 304 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 285 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | 305 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
| 286 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | 306 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. |
| 287 | bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. | 307 | bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. |
| 288 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. | 308 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. |
| 309 | bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. | ||
| 310 | }; | ||
| 311 | |||
| 312 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 313 | class ArraySamplerNode final { | ||
| 314 | public: | ||
| 315 | explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) | ||
| 316 | : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} | ||
| 317 | |||
| 318 | constexpr u32 GetIndex() const { | ||
| 319 | return index; | ||
| 320 | } | ||
| 321 | |||
| 322 | constexpr u32 GetBaseOffset() const { | ||
| 323 | return base_offset; | ||
| 324 | } | ||
| 325 | |||
| 326 | constexpr u32 GetIndexVar() const { | ||
| 327 | return bindless_var; | ||
| 328 | } | ||
| 329 | |||
| 330 | private: | ||
| 331 | u32 index; | ||
| 332 | u32 base_offset; | ||
| 333 | u32 bindless_var; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 337 | class BindlessSamplerNode final { | ||
| 338 | public: | ||
| 339 | explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} | ||
| 340 | |||
| 341 | constexpr u32 GetIndex() const { | ||
| 342 | return index; | ||
| 343 | } | ||
| 344 | |||
| 345 | constexpr u32 GetOffset() const { | ||
| 346 | return offset; | ||
| 347 | } | ||
| 348 | |||
| 349 | private: | ||
| 350 | u32 index; | ||
| 351 | u32 offset; | ||
| 289 | }; | 352 | }; |
| 290 | 353 | ||
| 291 | class Image final { | 354 | class Image final { |
| @@ -382,6 +445,7 @@ struct MetaTexture { | |||
| 382 | Node lod; | 445 | Node lod; |
| 383 | Node component{}; | 446 | Node component{}; |
| 384 | u32 element{}; | 447 | u32 element{}; |
| 448 | Node index{}; | ||
| 385 | }; | 449 | }; |
| 386 | 450 | ||
| 387 | struct MetaImage { | 451 | struct MetaImage { |
| @@ -488,6 +552,19 @@ private: | |||
| 488 | Tegra::Shader::Register index{}; | 552 | Tegra::Shader::Register index{}; |
| 489 | }; | 553 | }; |
| 490 | 554 | ||
| 555 | /// A custom variable | ||
| 556 | class CustomVarNode final { | ||
| 557 | public: | ||
| 558 | explicit constexpr CustomVarNode(u32 index) : index{index} {} | ||
| 559 | |||
| 560 | constexpr u32 GetIndex() const { | ||
| 561 | return index; | ||
| 562 | } | ||
| 563 | |||
| 564 | private: | ||
| 565 | u32 index{}; | ||
| 566 | }; | ||
| 567 | |||
| 491 | /// A 32-bits value that represents an immediate value | 568 | /// A 32-bits value that represents an immediate value |
| 492 | class ImmediateNode final { | 569 | class ImmediateNode final { |
| 493 | public: | 570 | public: |
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h | |||
| @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { | |||
| 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); | 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | template <typename T, typename... Args> | ||
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | ||
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | ||
| 51 | return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); | ||
| 52 | } | ||
| 53 | |||
| 48 | template <typename... Args> | 54 | template <typename... Args> |
| 49 | Node Operation(OperationCode code, Args&&... args) { | 55 | Node Operation(OperationCode code, Args&&... args) { |
| 50 | if constexpr (sizeof...(args) == 0) { | 56 | if constexpr (sizeof...(args) == 0) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet | |||
| 27 | ConstBufferLocker& locker) | 27 | ConstBufferLocker& locker) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { |
| 29 | Decode(); | 29 | Decode(); |
| 30 | PostDecode(); | ||
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | ShaderIR::~ShaderIR() = default; | 33 | ShaderIR::~ShaderIR() = default; |
| @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { | |||
| 38 | return MakeNode<GprNode>(reg); | 39 | return MakeNode<GprNode>(reg); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 42 | Node ShaderIR::GetCustomVariable(u32 id) { | ||
| 43 | return MakeNode<CustomVarNode>(id); | ||
| 44 | } | ||
| 45 | |||
| 41 | Node ShaderIR::GetImmediate19(Instruction instr) { | 46 | Node ShaderIR::GetImmediate19(Instruction instr) { |
| 42 | return Immediate(instr.alu.GetImm20_19()); | 47 | return Immediate(instr.alu.GetImm20_19()); |
| 43 | } | 48 | } |
| @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { | |||
| 452 | return id; | 457 | return id; |
| 453 | } | 458 | } |
| 454 | 459 | ||
| 460 | u32 ShaderIR::NewCustomVariable() { | ||
| 461 | return num_custom_variables++; | ||
| 462 | } | ||
| 463 | |||
| 455 | } // namespace VideoCommon::Shader | 464 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -180,6 +180,10 @@ public: | |||
| 180 | return amend_code[index]; | 180 | return amend_code[index]; |
| 181 | } | 181 | } |
| 182 | 182 | ||
| 183 | u32 GetNumCustomVariables() const { | ||
| 184 | return num_custom_variables; | ||
| 185 | } | ||
| 186 | |||
| 183 | private: | 187 | private: |
| 184 | friend class ASTDecoder; | 188 | friend class ASTDecoder; |
| 185 | 189 | ||
| @@ -191,6 +195,7 @@ private: | |||
| 191 | }; | 195 | }; |
| 192 | 196 | ||
| 193 | void Decode(); | 197 | void Decode(); |
| 198 | void PostDecode(); | ||
| 194 | 199 | ||
| 195 | NodeBlock DecodeRange(u32 begin, u32 end); | 200 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 196 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | 201 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); |
| @@ -235,6 +240,8 @@ private: | |||
| 235 | 240 | ||
| 236 | /// Generates a node for a passed register. | 241 | /// Generates a node for a passed register. |
| 237 | Node GetRegister(Tegra::Shader::Register reg); | 242 | Node GetRegister(Tegra::Shader::Register reg); |
| 243 | /// Generates a node for a custom variable | ||
| 244 | Node GetCustomVariable(u32 id); | ||
| 238 | /// Generates a node representing a 19-bit immediate value | 245 | /// Generates a node representing a 19-bit immediate value |
| 239 | Node GetImmediate19(Tegra::Shader::Instruction instr); | 246 | Node GetImmediate19(Tegra::Shader::Instruction instr); |
| 240 | /// Generates a node representing a 32-bit immediate value | 247 | /// Generates a node representing a 32-bit immediate value |
| @@ -321,7 +328,7 @@ private: | |||
| 321 | std::optional<SamplerInfo> sampler_info = std::nullopt); | 328 | std::optional<SamplerInfo> sampler_info = std::nullopt); |
| 322 | 329 | ||
| 323 | /// Accesses a texture sampler for a bindless texture. | 330 | /// Accesses a texture sampler for a bindless texture. |
| 324 | const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, | 331 | const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, |
| 325 | std::optional<SamplerInfo> sampler_info = std::nullopt); | 332 | std::optional<SamplerInfo> sampler_info = std::nullopt); |
| 326 | 333 | ||
| 327 | /// Accesses an image. | 334 | /// Accesses an image. |
| @@ -387,6 +394,9 @@ private: | |||
| 387 | 394 | ||
| 388 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 395 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 389 | 396 | ||
| 397 | std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 398 | s64 cursor); | ||
| 399 | |||
| 390 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 400 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 391 | 401 | ||
| 392 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | 402 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, |
| @@ -399,6 +409,8 @@ private: | |||
| 399 | /// Register new amending code and obtain the reference id. | 409 | /// Register new amending code and obtain the reference id. |
| 400 | std::size_t DeclareAmend(Node new_amend); | 410 | std::size_t DeclareAmend(Node new_amend); |
| 401 | 411 | ||
| 412 | u32 NewCustomVariable(); | ||
| 413 | |||
| 402 | const ProgramCode& program_code; | 414 | const ProgramCode& program_code; |
| 403 | const u32 main_offset; | 415 | const u32 main_offset; |
| 404 | const CompilerSettings settings; | 416 | const CompilerSettings settings; |
| @@ -414,6 +426,7 @@ private: | |||
| 414 | NodeBlock global_code; | 426 | NodeBlock global_code; |
| 415 | ASTManager program_manager{true, true}; | 427 | ASTManager program_manager{true, true}; |
| 416 | std::vector<Node> amend_code; | 428 | std::vector<Node> amend_code; |
| 429 | u32 num_custom_variables{}; | ||
| 417 | 430 | ||
| 418 | std::set<u32> used_registers; | 431 | std::set<u32> used_registers; |
| 419 | std::set<Tegra::Shader::Pred> used_predicates; | 432 | std::set<Tegra::Shader::Pred> used_predicates; |
| @@ -431,6 +444,7 @@ private: | |||
| 431 | bool uses_instance_id{}; | 444 | bool uses_instance_id{}; |
| 432 | bool uses_vertex_id{}; | 445 | bool uses_vertex_id{}; |
| 433 | bool uses_warps{}; | 446 | bool uses_warps{}; |
| 447 | bool uses_indexed_samplers{}; | ||
| 434 | 448 | ||
| 435 | Tegra::Shader::Header header; | 449 | Tegra::Shader::Header header; |
| 436 | }; | 450 | }; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..ea39bca54 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/shader/node.h" | 10 | #include "video_core/shader/node.h" |
| 11 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | 12 | #include "video_core/shader/shader_ir.h" |
| 12 | 13 | ||
| 13 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | |||
| 35 | } | 36 | } |
| 36 | return {}; | 37 | return {}; |
| 37 | } | 38 | } |
| 39 | |||
| 40 | std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { | ||
| 41 | if (operation.GetCode() != OperationCode::UAdd) { | ||
| 42 | return std::nullopt; | ||
| 43 | } | ||
| 44 | Node gpr{}; | ||
| 45 | Node offset{}; | ||
| 46 | ASSERT(operation.GetOperandsCount() == 2); | ||
| 47 | for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { | ||
| 48 | Node operand = operation[i]; | ||
| 49 | if (std::holds_alternative<ImmediateNode>(*operand)) { | ||
| 50 | offset = operation[i]; | ||
| 51 | } else if (std::holds_alternative<GprNode>(*operand)) { | ||
| 52 | gpr = operation[i]; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | if (offset && gpr) { | ||
| 56 | return std::make_pair(gpr, offset); | ||
| 57 | } | ||
| 58 | return std::nullopt; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool AmendNodeCv(std::size_t amend_index, Node node) { | ||
| 62 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 63 | operation->SetAmendIndex(amend_index); | ||
| 64 | return true; | ||
| 65 | } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 66 | conditional->SetAmendIndex(amend_index); | ||
| 67 | return true; | ||
| 68 | } | ||
| 69 | return false; | ||
| 70 | } | ||
| 71 | |||
| 38 | } // Anonymous namespace | 72 | } // Anonymous namespace |
| 39 | 73 | ||
| 74 | std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 75 | s64 cursor) { | ||
| 76 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 77 | // Constant buffer found, test if it's an immediate | ||
| 78 | const auto offset = cbuf->GetOffset(); | ||
| 79 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 80 | auto track = | ||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | ||
| 82 | return {tracked, track}; | ||
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||
| 84 | auto bound_buffer = locker.ObtainBoundBuffer(); | ||
| 85 | if (!bound_buffer) { | ||
| 86 | return {}; | ||
| 87 | } | ||
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | ||
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | ||
| 93 | return {}; | ||
| 94 | } | ||
| 95 | auto [gpr, base_offset] = *pair; | ||
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | ||
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | ||
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | ||
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | ||
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | ||
| 104 | |||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | ||
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | ||
| 107 | const std::size_t amend_index = DeclareAmend(amend_op); | ||
| 108 | AmendNodeCv(amend_index, code[cursor]); | ||
| 109 | // TODO Implement Bindless Index custom variable | ||
| 110 | auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), | ||
| 111 | offset_inm->GetValue(), bindless_cv); | ||
| 112 | return {tracked, track}; | ||
| 113 | } | ||
| 114 | return {}; | ||
| 115 | } | ||
| 116 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 117 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 118 | return {}; | ||
| 119 | } | ||
| 120 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 121 | // register that it uses as operand | ||
| 122 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 123 | if (!source) { | ||
| 124 | return {}; | ||
| 125 | } | ||
| 126 | return TrackBindlessSampler(source, code, new_cursor); | ||
| 127 | } | ||
| 128 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 129 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | ||
| 130 | if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); | ||
| 131 | std::get<0>(found)) { | ||
| 132 | // Cbuf found in operand. | ||
| 133 | return found; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | return {}; | ||
| 137 | } | ||
| 138 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 139 | const auto& conditional_code = conditional->GetCode(); | ||
| 140 | return TrackBindlessSampler(tracked, conditional_code, | ||
| 141 | static_cast<s64>(conditional_code.size())); | ||
| 142 | } | ||
| 143 | return {}; | ||
| 144 | } | ||
| 145 | |||
| 40 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | 146 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 41 | s64 cursor) const { | 147 | s64 cursor) const { |
| 42 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 148 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |