diff options
| author | 2019-04-17 20:59:49 -0400 | |
|---|---|---|
| committer | 2019-04-17 20:59:49 -0400 | |
| commit | 5bd5140bdecd7139029426502b3a474a7d9c9608 (patch) | |
| tree | 97c40345bf9c9af75ff9c294601f54ab3a42a1b6 /src | |
| parent | Merge pull request #2315 from ReinUsesLisp/severity-decompiler (diff) | |
| parent | Adapt Bindless to work with AOFFI (diff) | |
| download | yuzu-5bd5140bdecd7139029426502b3a474a7d9c9608.tar.gz yuzu-5bd5140bdecd7139029426502b3a474a7d9c9608.tar.xz yuzu-5bd5140bdecd7139029426502b3a474a7d9c9608.zip | |
Merge pull request #2348 from FernandoS27/guest-bindless
Implement Bindless Textures on Shader Decompiler and GL backend
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 113 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 40 |
8 files changed, 217 insertions, 44 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74403eed4..b198793bc 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -482,19 +482,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | |||
| 482 | return textures; | 482 | return textures; |
| 483 | } | 483 | } |
| 484 | 484 | ||
| 485 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | 485 | Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, |
| 486 | std::size_t offset) const { | 486 | std::size_t offset) const { |
| 487 | auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 488 | auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | ||
| 489 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | ||
| 490 | |||
| 491 | const GPUVAddr tex_info_address = | ||
| 492 | tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); | ||
| 493 | |||
| 494 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | ||
| 495 | |||
| 496 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 497 | |||
| 498 | Texture::FullTextureInfo tex_info{}; | 487 | Texture::FullTextureInfo tex_info{}; |
| 499 | tex_info.index = static_cast<u32>(offset); | 488 | tex_info.index = static_cast<u32>(offset); |
| 500 | 489 | ||
| @@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | |||
| 511 | return tex_info; | 500 | return tex_info; |
| 512 | } | 501 | } |
| 513 | 502 | ||
| 503 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | ||
| 504 | std::size_t offset) const { | ||
| 505 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 506 | const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | ||
| 507 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | ||
| 508 | |||
| 509 | const GPUVAddr tex_info_address = | ||
| 510 | tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); | ||
| 511 | |||
| 512 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | ||
| 513 | |||
| 514 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 515 | |||
| 516 | return GetTextureInfo(tex_handle, offset); | ||
| 517 | } | ||
| 518 | |||
| 514 | u32 Maxwell3D::GetRegisterValue(u32 method) const { | 519 | u32 Maxwell3D::GetRegisterValue(u32 method) const { |
| 515 | ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); | 520 | ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); |
| 516 | return regs.reg_array[method]; | 521 | return regs.reg_array[method]; |
| @@ -524,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() { | |||
| 524 | rasterizer.Clear(); | 529 | rasterizer.Clear(); |
| 525 | } | 530 | } |
| 526 | 531 | ||
| 532 | u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { | ||
| 533 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 534 | const auto& buffer = shader_stage.const_buffers[const_buffer]; | ||
| 535 | u32 result; | ||
| 536 | std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); | ||
| 537 | return result; | ||
| 538 | } | ||
| 539 | |||
| 527 | } // namespace Tegra::Engines | 540 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 321af3297..cc2424d38 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1131,12 +1131,18 @@ public: | |||
| 1131 | /// Write the value to the register identified by method. | 1131 | /// Write the value to the register identified by method. |
| 1132 | void CallMethod(const GPU::MethodCall& method_call); | 1132 | void CallMethod(const GPU::MethodCall& method_call); |
| 1133 | 1133 | ||
| 1134 | /// Given a Texture Handle, returns the TSC and TIC entries. | ||
| 1135 | Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, | ||
| 1136 | std::size_t offset) const; | ||
| 1137 | |||
| 1134 | /// Returns a list of enabled textures for the specified shader stage. | 1138 | /// Returns a list of enabled textures for the specified shader stage. |
| 1135 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; | 1139 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; |
| 1136 | 1140 | ||
| 1137 | /// Returns the texture information for a specific texture in a specific shader stage. | 1141 | /// Returns the texture information for a specific texture in a specific shader stage. |
| 1138 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | 1142 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; |
| 1139 | 1143 | ||
| 1144 | u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; | ||
| 1145 | |||
| 1140 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | 1146 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than |
| 1141 | /// we've seen used. | 1147 | /// we've seen used. |
| 1142 | using MacroMemory = std::array<u32, 0x40000>; | 1148 | using MacroMemory = std::array<u32, 0x40000>; |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 38db4addd..fce9733b9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -987,6 +987,38 @@ union Instruction { | |||
| 987 | } tex; | 987 | } tex; |
| 988 | 988 | ||
| 989 | union { | 989 | union { |
| 990 | BitField<28, 1, u64> array; | ||
| 991 | BitField<29, 2, TextureType> texture_type; | ||
| 992 | BitField<31, 4, u64> component_mask; | ||
| 993 | BitField<49, 1, u64> nodep_flag; | ||
| 994 | BitField<50, 1, u64> dc_flag; | ||
| 995 | BitField<36, 1, u64> aoffi_flag; | ||
| 996 | BitField<37, 3, TextureProcessMode> process_mode; | ||
| 997 | |||
| 998 | bool IsComponentEnabled(std::size_t component) const { | ||
| 999 | return ((1ULL << component) & component_mask) != 0; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | TextureProcessMode GetTextureProcessMode() const { | ||
| 1003 | return process_mode; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1007 | switch (mode) { | ||
| 1008 | case TextureMiscMode::DC: | ||
| 1009 | return dc_flag != 0; | ||
| 1010 | case TextureMiscMode::NODEP: | ||
| 1011 | return nodep_flag != 0; | ||
| 1012 | case TextureMiscMode::AOFFI: | ||
| 1013 | return aoffi_flag != 0; | ||
| 1014 | default: | ||
| 1015 | break; | ||
| 1016 | } | ||
| 1017 | return false; | ||
| 1018 | } | ||
| 1019 | } tex_b; | ||
| 1020 | |||
| 1021 | union { | ||
| 990 | BitField<22, 6, TextureQueryType> query_type; | 1022 | BitField<22, 6, TextureQueryType> query_type; |
| 991 | BitField<31, 4, u64> component_mask; | 1023 | BitField<31, 4, u64> component_mask; |
| 992 | BitField<49, 1, u64> nodep_flag; | 1024 | BitField<49, 1, u64> nodep_flag; |
| @@ -1332,7 +1364,9 @@ public: | |||
| 1332 | LDG, // Load from global memory | 1364 | LDG, // Load from global memory |
| 1333 | STG, // Store in global memory | 1365 | STG, // Store in global memory |
| 1334 | TEX, | 1366 | TEX, |
| 1367 | TEX_B, // Texture Load Bindless | ||
| 1335 | TXQ, // Texture Query | 1368 | TXQ, // Texture Query |
| 1369 | TXQ_B, // Texture Query Bindless | ||
| 1336 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations | 1370 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations |
| 1337 | TLDS, // Texture Load with scalar/non-vec4 source/destinations | 1371 | TLDS, // Texture Load with scalar/non-vec4 source/destinations |
| 1338 | TLD4, // Texture Load 4 | 1372 | TLD4, // Texture Load 4 |
| @@ -1600,7 +1634,9 @@ private: | |||
| 1600 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | 1634 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), |
| 1601 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1635 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1602 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | 1636 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
| 1637 | INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), | ||
| 1603 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), | 1638 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), |
| 1639 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | ||
| 1604 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | 1640 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1605 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), | 1641 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1606 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1642 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0b1fe3494..86a2e117d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -974,7 +974,15 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s | |||
| 974 | 974 | ||
| 975 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 975 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 976 | const auto& entry = entries[bindpoint]; | 976 | const auto& entry = entries[bindpoint]; |
| 977 | const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); | 977 | Tegra::Texture::FullTextureInfo texture; |
| 978 | if (entry.IsBindless()) { | ||
| 979 | const auto cbuf = entry.GetBindlessCBuf(); | ||
| 980 | Tegra::Texture::TextureHandle tex_handle; | ||
| 981 | tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); | ||
| 982 | texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); | ||
| 983 | } else { | ||
| 984 | texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); | ||
| 985 | } | ||
| 978 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; | 986 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; |
| 979 | 987 | ||
| 980 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | 988 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 55b3d4d7b..74032d237 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -69,6 +69,7 @@ private: | |||
| 69 | struct ShaderEntries { | 69 | struct ShaderEntries { |
| 70 | std::vector<ConstBufferEntry> const_buffers; | 70 | std::vector<ConstBufferEntry> const_buffers; |
| 71 | std::vector<SamplerEntry> samplers; | 71 | std::vector<SamplerEntry> samplers; |
| 72 | std::vector<SamplerEntry> bindless_samplers; | ||
| 72 | std::vector<GlobalMemoryEntry> global_memory_entries; | 73 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 73 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 74 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 74 | std::size_t shader_length{}; | 75 | std::size_t shader_length{}; |
| @@ -79,4 +80,4 @@ std::string GetCommonDeclarations(); | |||
| 79 | ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, | 80 | ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, |
| 80 | const std::string& suffix); | 81 | const std::string& suffix); |
| 81 | 82 | ||
| 82 | } // namespace OpenGL::GLShader \ No newline at end of file | 83 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d5890a375..53752b38d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 319 | u32 type{}; | 319 | u32 type{}; |
| 320 | u8 is_array{}; | 320 | u8 is_array{}; |
| 321 | u8 is_shadow{}; | 321 | u8 is_shadow{}; |
| 322 | u8 is_bindless{}; | ||
| 322 | if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || | 323 | if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || |
| 323 | file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || | 324 | file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || |
| 324 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || | 325 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || |
| 325 | file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || | 326 | file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || |
| 326 | file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) { | 327 | file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) || |
| 328 | file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) { | ||
| 327 | return {}; | 329 | return {}; |
| 328 | } | 330 | } |
| 329 | entry.entries.samplers.emplace_back( | 331 | entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), |
| 330 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | 332 | static_cast<std::size_t>(index), |
| 331 | static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0); | 333 | static_cast<Tegra::Shader::TextureType>(type), |
| 334 | is_array != 0, is_shadow != 0, is_bindless != 0); | ||
| 332 | } | 335 | } |
| 333 | 336 | ||
| 334 | u32 global_memory_count{}; | 337 | u32 global_memory_count{}; |
| @@ -393,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu | |||
| 393 | file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || | 396 | file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || |
| 394 | file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || | 397 | file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || |
| 395 | file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || | 398 | file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || |
| 396 | file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) { | 399 | file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 || |
| 400 | file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) { | ||
| 397 | return false; | 401 | return false; |
| 398 | } | 402 | } |
| 399 | } | 403 | } |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a775b402b..fa65ac9a9 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { | |||
| 40 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | 40 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { |
| 41 | const Instruction instr = {program_code[pc]}; | 41 | const Instruction instr = {program_code[pc]}; |
| 42 | const auto opcode = OpCode::Decode(instr); | 42 | const auto opcode = OpCode::Decode(instr); |
| 43 | 43 | bool is_bindless = false; | |
| 44 | switch (opcode->get().GetId()) { | 44 | switch (opcode->get().GetId()) { |
| 45 | case OpCode::Id::TEX: { | 45 | case OpCode::Id::TEX: { |
| 46 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | 46 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { |
| @@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); | 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); |
| 55 | WriteTexInstructionFloat( | 55 | WriteTexInstructionFloat( |
| 56 | bb, instr, | 56 | bb, instr, |
| 57 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); | 57 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); |
| 58 | break; | ||
| 59 | } | ||
| 60 | case OpCode::Id::TEX_B: { | ||
| 61 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 62 | "AOFFI is not implemented"); | ||
| 63 | |||
| 64 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 65 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 66 | } | ||
| 67 | |||
| 68 | const TextureType texture_type{instr.tex_b.texture_type}; | ||
| 69 | const bool is_array = instr.tex_b.array != 0; | ||
| 70 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 71 | const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); | ||
| 72 | const auto process_mode = instr.tex_b.GetTextureProcessMode(); | ||
| 73 | WriteTexInstructionFloat(bb, instr, | ||
| 74 | GetTexCode(instr, texture_type, process_mode, depth_compare, | ||
| 75 | is_array, is_aoffi, {instr.gpr20})); | ||
| 58 | break; | 76 | break; |
| 59 | } | 77 | } |
| 60 | case OpCode::Id::TEXS: { | 78 | case OpCode::Id::TEXS: { |
| @@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 134 | WriteTexsInstructionFloat(bb, instr, values); | 152 | WriteTexsInstructionFloat(bb, instr, values); |
| 135 | break; | 153 | break; |
| 136 | } | 154 | } |
| 155 | case OpCode::Id::TXQ_B: | ||
| 156 | is_bindless = true; | ||
| 157 | [[fallthrough]]; | ||
| 137 | case OpCode::Id::TXQ: { | 158 | case OpCode::Id::TXQ: { |
| 138 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | 159 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { |
| 139 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | 160 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); |
| @@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 143 | // Sadly, not all texture instructions specify the type of texture their sampler | 164 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 144 | // uses. This must be fixed at a later instance. | 165 | // uses. This must be fixed at a later instance. |
| 145 | const auto& sampler = | 166 | const auto& sampler = |
| 146 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | 167 | is_bindless |
| 168 | ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, | ||
| 169 | false) | ||
| 170 | : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 147 | 171 | ||
| 148 | u32 indexer = 0; | 172 | u32 indexer = 0; |
| 149 | switch (instr.txq.query_type) { | 173 | switch (instr.txq.query_type) { |
| @@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 154 | } | 178 | } |
| 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 179 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 156 | const Node value = | 180 | const Node value = |
| 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | 181 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 182 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | ||
| 158 | SetTemporal(bb, indexer++, value); | 183 | SetTemporal(bb, indexer++, value); |
| 159 | } | 184 | } |
| 160 | for (u32 i = 0; i < indexer; ++i) { | 185 | for (u32 i = 0; i < indexer; ++i) { |
| @@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 168 | } | 193 | } |
| 169 | break; | 194 | break; |
| 170 | } | 195 | } |
| 196 | case OpCode::Id::TMML_B: | ||
| 197 | is_bindless = true; | ||
| 198 | [[fallthrough]]; | ||
| 171 | case OpCode::Id::TMML: { | 199 | case OpCode::Id::TMML: { |
| 172 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | 200 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), |
| 173 | "NDV is not implemented"); | 201 | "NDV is not implemented"); |
| @@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 178 | 206 | ||
| 179 | auto texture_type = instr.tmml.texture_type.Value(); | 207 | auto texture_type = instr.tmml.texture_type.Value(); |
| 180 | const bool is_array = instr.tmml.array != 0; | 208 | const bool is_array = instr.tmml.array != 0; |
| 181 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 209 | const auto& sampler = is_bindless |
| 210 | ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) | ||
| 211 | : GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 182 | 212 | ||
| 183 | std::vector<Node> coords; | 213 | std::vector<Node> coords; |
| 184 | 214 | ||
| @@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 199 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | 229 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); |
| 200 | texture_type = TextureType::Texture2D; | 230 | texture_type = TextureType::Texture2D; |
| 201 | } | 231 | } |
| 202 | 232 | u32 indexer = 0; | |
| 203 | for (u32 element = 0; element < 2; ++element) { | 233 | for (u32 element = 0; element < 2; ++element) { |
| 234 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 235 | continue; | ||
| 236 | } | ||
| 204 | auto params = coords; | 237 | auto params = coords; |
| 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 238 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 239 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 207 | SetTemporal(bb, element, value); | 240 | SetTemporal(bb, indexer++, value); |
| 208 | } | 241 | } |
| 209 | for (u32 element = 0; element < 2; ++element) { | 242 | for (u32 i = 0; i < indexer; ++i) { |
| 210 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | 243 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
| 211 | } | 244 | } |
| 212 | |||
| 213 | break; | 245 | break; |
| 214 | } | 246 | } |
| 215 | case OpCode::Id::TLDS: { | 247 | case OpCode::Id::TLDS: { |
| @@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 254 | return *used_samplers.emplace(entry).first; | 286 | return *used_samplers.emplace(entry).first; |
| 255 | } | 287 | } |
| 256 | 288 | ||
| 289 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | ||
| 290 | bool is_array, bool is_shadow) { | ||
| 291 | const Node sampler_register = GetRegister(reg); | ||
| 292 | const Node base_sampler = | ||
| 293 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||
| 294 | const auto cbuf = std::get_if<CbufNode>(base_sampler); | ||
| 295 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||
| 296 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 297 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 298 | const auto cbuf_index = cbuf->GetIndex(); | ||
| 299 | const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset; | ||
| 300 | |||
| 301 | // If this sampler has already been used, return the existing mapping. | ||
| 302 | const auto itr = | ||
| 303 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 304 | [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); | ||
| 305 | if (itr != used_samplers.end()) { | ||
| 306 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 307 | itr->IsShadow() == is_shadow); | ||
| 308 | return *itr; | ||
| 309 | } | ||
| 310 | |||
| 311 | // Otherwise create a new mapping for this sampler | ||
| 312 | const std::size_t next_index = used_samplers.size(); | ||
| 313 | const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; | ||
| 314 | return *used_samplers.emplace(entry).first; | ||
| 315 | } | ||
| 316 | |||
| 257 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | 317 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { |
| 258 | u32 dest_elem = 0; | 318 | u32 dest_elem = 0; |
| 259 | for (u32 elem = 0; elem < 4; ++elem) { | 319 | for (u32 elem = 0; elem < 4; ++elem) { |
| @@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 386 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 327 | TextureProcessMode process_mode, std::vector<Node> coords, | 387 | TextureProcessMode process_mode, std::vector<Node> coords, |
| 328 | Node array, Node depth_compare, u32 bias_offset, | 388 | Node array, Node depth_compare, u32 bias_offset, |
| 329 | std::vector<Node> aoffi) { | 389 | std::vector<Node> aoffi, |
| 390 | std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 330 | const bool is_array = array; | 391 | const bool is_array = array; |
| 331 | const bool is_shadow = depth_compare; | 392 | const bool is_shadow = depth_compare; |
| 393 | const bool is_bindless = bindless_reg.has_value(); | ||
| 332 | 394 | ||
| 333 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || | 395 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || |
| 334 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | 396 | (texture_type == TextureType::TextureCube && is_array && is_shadow), |
| 335 | "This method is not supported."); | 397 | "This method is not supported."); |
| 336 | 398 | ||
| 337 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | 399 | const auto& sampler = is_bindless |
| 400 | ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) | ||
| 401 | : GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 338 | 402 | ||
| 339 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | 403 | const bool lod_needed = process_mode == TextureProcessMode::LZ || |
| 340 | process_mode == TextureProcessMode::LL || | 404 | process_mode == TextureProcessMode::LL || |
| 341 | process_mode == TextureProcessMode::LLA; | 405 | process_mode == TextureProcessMode::LLA; |
| 342 | 406 | ||
| 343 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | 407 | // LOD selection (either via bias or explicit textureLod) not |
| 344 | // sampler2DArrayShadow and samplerCubeArrayShadow. | 408 | // supported in GL for sampler2DArrayShadow and |
| 409 | // samplerCubeArrayShadow. | ||
| 345 | const bool gl_lod_supported = | 410 | const bool gl_lod_supported = |
| 346 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || | 411 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || |
| 347 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); | 412 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); |
| @@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 359 | lod = Immediate(0.0f); | 424 | lod = Immediate(0.0f); |
| 360 | break; | 425 | break; |
| 361 | case TextureProcessMode::LB: | 426 | case TextureProcessMode::LB: |
| 362 | // If present, lod or bias are always stored in the register indexed by the gpr20 | 427 | // If present, lod or bias are always stored in the register |
| 363 | // field with an offset depending on the usage of the other registers | 428 | // indexed by the gpr20 field with an offset depending on the |
| 429 | // usage of the other registers | ||
| 364 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | 430 | bias = GetRegister(instr.gpr20.Value() + bias_offset); |
| 365 | break; | 431 | break; |
| 366 | case TextureProcessMode::LL: | 432 | case TextureProcessMode::LL: |
| @@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 384 | 450 | ||
| 385 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | 451 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
| 386 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | 452 | TextureProcessMode process_mode, bool depth_compare, bool is_array, |
| 387 | bool is_aoffi) { | 453 | bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { |
| 388 | const bool lod_bias_enabled{ | 454 | const bool lod_bias_enabled{ |
| 389 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | 455 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; |
| 390 | 456 | ||
| 457 | const bool is_bindless = bindless_reg.has_value(); | ||
| 458 | |||
| 391 | u64 parameter_register = instr.gpr20.Value(); | 459 | u64 parameter_register = instr.gpr20.Value(); |
| 460 | if (is_bindless) { | ||
| 461 | ++parameter_register; | ||
| 462 | } | ||
| 463 | |||
| 464 | const u32 bias_lod_offset = (is_bindless ? 1 : 0); | ||
| 392 | if (lod_bias_enabled) { | 465 | if (lod_bias_enabled) { |
| 393 | ++parameter_register; | 466 | ++parameter_register; |
| 394 | } | 467 | } |
| @@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 423 | dc = GetRegister(parameter_register++); | 496 | dc = GetRegister(parameter_register++); |
| 424 | } | 497 | } |
| 425 | 498 | ||
| 426 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); | 499 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, |
| 500 | aoffi, bindless_reg); | ||
| 427 | } | 501 | } |
| 428 | 502 | ||
| 429 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 503 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| @@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 459 | dc = GetRegister(depth_register); | 533 | dc = GetRegister(depth_register); |
| 460 | } | 534 | } |
| 461 | 535 | ||
| 462 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); | 536 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, |
| 537 | {}); | ||
| 463 | } | 538 | } |
| 464 | 539 | ||
| 465 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 540 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1afab08c0..57af8b10f 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -196,9 +196,23 @@ enum class ExitMethod { | |||
| 196 | 196 | ||
| 197 | class Sampler { | 197 | class Sampler { |
| 198 | public: | 198 | public: |
| 199 | // Use this constructor for bounded Samplers | ||
| 199 | explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, | 200 | explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, |
| 200 | bool is_array, bool is_shadow) | 201 | bool is_array, bool is_shadow) |
| 201 | : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} | 202 | : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |
| 203 | is_bindless{false} {} | ||
| 204 | |||
| 205 | // Use this constructor for bindless Samplers | ||
| 206 | explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, | ||
| 207 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow) | ||
| 208 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, | ||
| 209 | is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} | ||
| 210 | |||
| 211 | // Use this only for serialization/deserialization | ||
| 212 | explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, | ||
| 213 | bool is_array, bool is_shadow, bool is_bindless) | ||
| 214 | : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, | ||
| 215 | is_bindless{is_bindless} {} | ||
| 202 | 216 | ||
| 203 | std::size_t GetOffset() const { | 217 | std::size_t GetOffset() const { |
| 204 | return offset; | 218 | return offset; |
| @@ -220,6 +234,14 @@ public: | |||
| 220 | return is_shadow; | 234 | return is_shadow; |
| 221 | } | 235 | } |
| 222 | 236 | ||
| 237 | bool IsBindless() const { | ||
| 238 | return is_bindless; | ||
| 239 | } | ||
| 240 | |||
| 241 | std::pair<u32, u32> GetBindlessCBuf() const { | ||
| 242 | return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; | ||
| 243 | } | ||
| 244 | |||
| 223 | bool operator<(const Sampler& rhs) const { | 245 | bool operator<(const Sampler& rhs) const { |
| 224 | return std::tie(offset, index, type, is_array, is_shadow) < | 246 | return std::tie(offset, index, type, is_array, is_shadow) < |
| 225 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); | 247 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); |
| @@ -231,8 +253,9 @@ private: | |||
| 231 | std::size_t offset{}; | 253 | std::size_t offset{}; |
| 232 | std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. | 254 | std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. |
| 233 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 255 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 234 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | 256 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
| 235 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | 257 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. |
| 258 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. | ||
| 236 | }; | 259 | }; |
| 237 | 260 | ||
| 238 | class ConstBuffer { | 261 | class ConstBuffer { |
| @@ -735,6 +758,11 @@ private: | |||
| 735 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | 758 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |
| 736 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); | 759 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); |
| 737 | 760 | ||
| 761 | // Accesses a texture sampler for a bindless texture. | ||
| 762 | const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, | ||
| 763 | Tegra::Shader::TextureType type, bool is_array, | ||
| 764 | bool is_shadow); | ||
| 765 | |||
| 738 | /// Extracts a sequence of bits from a node | 766 | /// Extracts a sequence of bits from a node |
| 739 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 767 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| 740 | 768 | ||
| @@ -748,7 +776,8 @@ private: | |||
| 748 | 776 | ||
| 749 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 777 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 750 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 778 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 751 | bool is_array, bool is_aoffi); | 779 | bool is_array, bool is_aoffi, |
| 780 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 752 | 781 | ||
| 753 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 782 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 754 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 783 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| @@ -768,7 +797,8 @@ private: | |||
| 768 | 797 | ||
| 769 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 798 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 770 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | 799 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |
| 771 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); | 800 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, |
| 801 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 772 | 802 | ||
| 773 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | 803 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
| 774 | u64 byte_height); | 804 | u64 byte_height); |