diff options
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h (renamed from src/video_core/engines/maxwell_compute.h) | 31 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_compute.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 97 | ||||
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 157 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 22 |
12 files changed, 188 insertions, 211 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 33e507e69..1db0d031d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -5,12 +5,12 @@ add_library(video_core STATIC | |||
| 5 | debug_utils/debug_utils.h | 5 | debug_utils/debug_utils.h |
| 6 | engines/fermi_2d.cpp | 6 | engines/fermi_2d.cpp |
| 7 | engines/fermi_2d.h | 7 | engines/fermi_2d.h |
| 8 | engines/kepler_compute.cpp | ||
| 9 | engines/kepler_compute.h | ||
| 8 | engines/kepler_memory.cpp | 10 | engines/kepler_memory.cpp |
| 9 | engines/kepler_memory.h | 11 | engines/kepler_memory.h |
| 10 | engines/maxwell_3d.cpp | 12 | engines/maxwell_3d.cpp |
| 11 | engines/maxwell_3d.h | 13 | engines/maxwell_3d.h |
| 12 | engines/maxwell_compute.cpp | ||
| 13 | engines/maxwell_compute.h | ||
| 14 | engines/maxwell_dma.cpp | 14 | engines/maxwell_dma.cpp |
| 15 | engines/maxwell_dma.h | 15 | engines/maxwell_dma.h |
| 16 | engines/shader_bytecode.h | 16 | engines/shader_bytecode.h |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp new file mode 100644 index 000000000..4ca856b6b --- /dev/null +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/core.h" | ||
| 7 | #include "core/memory.h" | ||
| 8 | #include "video_core/engines/kepler_compute.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | |||
| 11 | namespace Tegra::Engines { | ||
| 12 | |||
| 13 | KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} | ||
| 14 | |||
| 15 | KeplerCompute::~KeplerCompute() = default; | ||
| 16 | |||
| 17 | void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | ||
| 18 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | ||
| 19 | "Invalid KeplerCompute register, increase the size of the Regs structure"); | ||
| 20 | |||
| 21 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 22 | |||
| 23 | switch (method_call.method) { | ||
| 24 | case KEPLER_COMPUTE_REG_INDEX(launch): | ||
| 25 | // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA | ||
| 26 | // kernels) | ||
| 27 | UNREACHABLE_MSG("Compute shaders are not implemented"); | ||
| 28 | break; | ||
| 29 | default: | ||
| 30 | break; | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/kepler_compute.h index 1d71f11bd..df0a32e0f 100644 --- a/src/video_core/engines/maxwell_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -10,47 +10,48 @@ | |||
| 10 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/gpu.h" | 12 | #include "video_core/gpu.h" |
| 13 | #include "video_core/memory_manager.h" | ||
| 13 | 14 | ||
| 14 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 15 | 16 | ||
| 16 | #define MAXWELL_COMPUTE_REG_INDEX(field_name) \ | 17 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 17 | (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32)) | 18 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 18 | 19 | ||
| 19 | class MaxwellCompute final { | 20 | class KeplerCompute final { |
| 20 | public: | 21 | public: |
| 21 | MaxwellCompute() = default; | 22 | explicit KeplerCompute(MemoryManager& memory_manager); |
| 22 | ~MaxwellCompute() = default; | 23 | ~KeplerCompute(); |
| 24 | |||
| 25 | static constexpr std::size_t NumConstBuffers = 8; | ||
| 23 | 26 | ||
| 24 | struct Regs { | 27 | struct Regs { |
| 25 | static constexpr std::size_t NUM_REGS = 0xCF8; | 28 | static constexpr std::size_t NUM_REGS = 0xCF8; |
| 26 | 29 | ||
| 27 | union { | 30 | union { |
| 28 | struct { | 31 | struct { |
| 29 | INSERT_PADDING_WORDS(0x281); | 32 | INSERT_PADDING_WORDS(0xAF); |
| 30 | 33 | ||
| 31 | union { | 34 | u32 launch; |
| 32 | u32 compute_end; | ||
| 33 | BitField<0, 1, u32> unknown; | ||
| 34 | } compute; | ||
| 35 | 35 | ||
| 36 | INSERT_PADDING_WORDS(0xA76); | 36 | INSERT_PADDING_WORDS(0xC48); |
| 37 | }; | 37 | }; |
| 38 | std::array<u32, NUM_REGS> reg_array; | 38 | std::array<u32, NUM_REGS> reg_array; |
| 39 | }; | 39 | }; |
| 40 | } regs{}; | 40 | } regs{}; |
| 41 | |||
| 42 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), | 41 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), |
| 43 | "MaxwellCompute Regs has wrong size"); | 42 | "KeplerCompute Regs has wrong size"); |
| 43 | |||
| 44 | MemoryManager& memory_manager; | ||
| 44 | 45 | ||
| 45 | /// Write the value to the register identified by method. | 46 | /// Write the value to the register identified by method. |
| 46 | void CallMethod(const GPU::MethodCall& method_call); | 47 | void CallMethod(const GPU::MethodCall& method_call); |
| 47 | }; | 48 | }; |
| 48 | 49 | ||
| 49 | #define ASSERT_REG_POSITION(field_name, position) \ | 50 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 50 | static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \ | 51 | static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ |
| 51 | "Field " #field_name " has invalid position") | 52 | "Field " #field_name " has invalid position") |
| 52 | 53 | ||
| 53 | ASSERT_REG_POSITION(compute, 0x281); | 54 | ASSERT_REG_POSITION(launch, 0xAF); |
| 54 | 55 | ||
| 55 | #undef ASSERT_REG_POSITION | 56 | #undef ASSERT_REG_POSITION |
| 56 | 57 | ||
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp deleted file mode 100644 index 656db6a61..000000000 --- a/src/video_core/engines/maxwell_compute.cpp +++ /dev/null | |||
| @@ -1,28 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/core.h" | ||
| 7 | #include "video_core/engines/maxwell_compute.h" | ||
| 8 | |||
| 9 | namespace Tegra::Engines { | ||
| 10 | |||
| 11 | void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) { | ||
| 12 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | ||
| 13 | "Invalid MaxwellCompute register, increase the size of the Regs structure"); | ||
| 14 | |||
| 15 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 16 | |||
| 17 | switch (method_call.method) { | ||
| 18 | case MAXWELL_COMPUTE_REG_INDEX(compute): { | ||
| 19 | LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented"); | ||
| 20 | UNREACHABLE(); | ||
| 21 | break; | ||
| 22 | } | ||
| 23 | default: | ||
| 24 | break; | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 269df9437..1f425f90b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -186,7 +186,7 @@ enum class SubOp : u64 { | |||
| 186 | }; | 186 | }; |
| 187 | 187 | ||
| 188 | enum class F2iRoundingOp : u64 { | 188 | enum class F2iRoundingOp : u64 { |
| 189 | None = 0, | 189 | RoundEven = 0, |
| 190 | Floor = 1, | 190 | Floor = 1, |
| 191 | Ceil = 2, | 191 | Ceil = 2, |
| 192 | Trunc = 3, | 192 | Trunc = 3, |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d2ba1103e..018363f95 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -6,9 +6,9 @@ | |||
| 6 | #include "core/core_timing.h" | 6 | #include "core/core_timing.h" |
| 7 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 8 | #include "video_core/engines/fermi_2d.h" | 8 | #include "video_core/engines/fermi_2d.h" |
| 9 | #include "video_core/engines/kepler_compute.h" | ||
| 9 | #include "video_core/engines/kepler_memory.h" | 10 | #include "video_core/engines/kepler_memory.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/maxwell_compute.h" | ||
| 12 | #include "video_core/engines/maxwell_dma.h" | 12 | #include "video_core/engines/maxwell_dma.h" |
| 13 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| @@ -31,7 +31,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { | |||
| 31 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 31 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 32 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); | 32 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); |
| 33 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 33 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
| 34 | maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); | 34 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); |
| 35 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); | 35 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); |
| 36 | kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); | 36 | kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); |
| 37 | } | 37 | } |
| @@ -245,8 +245,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { | |||
| 245 | case EngineID::MAXWELL_B: | 245 | case EngineID::MAXWELL_B: |
| 246 | maxwell_3d->CallMethod(method_call); | 246 | maxwell_3d->CallMethod(method_call); |
| 247 | break; | 247 | break; |
| 248 | case EngineID::MAXWELL_COMPUTE_B: | 248 | case EngineID::KEPLER_COMPUTE_B: |
| 249 | maxwell_compute->CallMethod(method_call); | 249 | kepler_compute->CallMethod(method_call); |
| 250 | break; | 250 | break; |
| 251 | case EngineID::MAXWELL_DMA_COPY_A: | 251 | case EngineID::MAXWELL_DMA_COPY_A: |
| 252 | maxwell_dma->CallMethod(method_call); | 252 | maxwell_dma->CallMethod(method_call); |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fb8975811..21d82e426 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -102,15 +102,15 @@ struct FramebufferConfig { | |||
| 102 | namespace Engines { | 102 | namespace Engines { |
| 103 | class Fermi2D; | 103 | class Fermi2D; |
| 104 | class Maxwell3D; | 104 | class Maxwell3D; |
| 105 | class MaxwellCompute; | ||
| 106 | class MaxwellDMA; | 105 | class MaxwellDMA; |
| 106 | class KeplerCompute; | ||
| 107 | class KeplerMemory; | 107 | class KeplerMemory; |
| 108 | } // namespace Engines | 108 | } // namespace Engines |
| 109 | 109 | ||
| 110 | enum class EngineID { | 110 | enum class EngineID { |
| 111 | FERMI_TWOD_A = 0x902D, // 2D Engine | 111 | FERMI_TWOD_A = 0x902D, // 2D Engine |
| 112 | MAXWELL_B = 0xB197, // 3D Engine | 112 | MAXWELL_B = 0xB197, // 3D Engine |
| 113 | MAXWELL_COMPUTE_B = 0xB1C0, | 113 | KEPLER_COMPUTE_B = 0xB1C0, |
| 114 | KEPLER_INLINE_TO_MEMORY_B = 0xA140, | 114 | KEPLER_INLINE_TO_MEMORY_B = 0xA140, |
| 115 | MAXWELL_DMA_COPY_A = 0xB0B5, | 115 | MAXWELL_DMA_COPY_A = 0xB0B5, |
| 116 | }; | 116 | }; |
| @@ -208,7 +208,7 @@ private: | |||
| 208 | /// 2D engine | 208 | /// 2D engine |
| 209 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | 209 | std::unique_ptr<Engines::Fermi2D> fermi_2d; |
| 210 | /// Compute engine | 210 | /// Compute engine |
| 211 | std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; | 211 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; |
| 212 | /// DMA engine | 212 | /// DMA engine |
| 213 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 213 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 214 | /// Inline memory engine | 214 | /// Inline memory engine |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 70e124dc4..b39bb4843 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -719,45 +719,51 @@ private: | |||
| 719 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 719 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 720 | 720 | ||
| 721 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 721 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 722 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 723 | ASSERT(meta); | 722 | ASSERT(meta); |
| 724 | 723 | ||
| 724 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 725 | const bool has_array = meta->sampler.IsArray(); | ||
| 726 | const bool has_shadow = meta->sampler.IsShadow(); | ||
| 727 | |||
| 725 | std::string expr = func; | 728 | std::string expr = func; |
| 726 | expr += '('; | 729 | expr += '('; |
| 727 | expr += GetSampler(meta->sampler); | 730 | expr += GetSampler(meta->sampler); |
| 728 | expr += ", "; | 731 | expr += ", "; |
| 729 | 732 | ||
| 730 | expr += coord_constructors[meta->coords_count - 1]; | 733 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); |
| 731 | expr += '('; | 734 | expr += '('; |
| 732 | for (u32 i = 0; i < count; ++i) { | 735 | for (u32 i = 0; i < count; ++i) { |
| 733 | const bool is_extra = i >= meta->coords_count; | 736 | expr += Visit(operation[i]); |
| 734 | const bool is_array = i == meta->array_index; | ||
| 735 | |||
| 736 | std::string operand = [&]() { | ||
| 737 | if (is_extra && is_extra_int) { | ||
| 738 | if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) { | ||
| 739 | return std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 740 | } else { | ||
| 741 | return "ftoi(" + Visit(operation[i]) + ')'; | ||
| 742 | } | ||
| 743 | } else { | ||
| 744 | return Visit(operation[i]); | ||
| 745 | } | ||
| 746 | }(); | ||
| 747 | if (is_array) { | ||
| 748 | ASSERT(!is_extra); | ||
| 749 | operand = "float(ftoi(" + operand + "))"; | ||
| 750 | } | ||
| 751 | |||
| 752 | expr += operand; | ||
| 753 | 737 | ||
| 754 | if (i + 1 == meta->coords_count) { | 738 | const u32 next = i + 1; |
| 755 | expr += ')'; | 739 | if (next < count || has_array || has_shadow) |
| 756 | } | 740 | expr += ", "; |
| 757 | if (i + 1 < count) { | 741 | } |
| 742 | if (has_array) { | ||
| 743 | expr += "float(ftoi(" + Visit(meta->array) + "))"; | ||
| 744 | } | ||
| 745 | if (has_shadow) { | ||
| 746 | if (has_array) | ||
| 758 | expr += ", "; | 747 | expr += ", "; |
| 748 | expr += Visit(meta->depth_compare); | ||
| 749 | } | ||
| 750 | expr += ')'; | ||
| 751 | |||
| 752 | for (const Node extra : meta->extras) { | ||
| 753 | expr += ", "; | ||
| 754 | if (is_extra_int) { | ||
| 755 | if (const auto immediate = std::get_if<ImmediateNode>(extra)) { | ||
| 756 | // Inline the string as an immediate integer in GLSL (some extra arguments are | ||
| 757 | // required to be constant) | ||
| 758 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 759 | } else { | ||
| 760 | expr += "ftoi(" + Visit(extra) + ')'; | ||
| 761 | } | ||
| 762 | } else { | ||
| 763 | expr += Visit(extra); | ||
| 759 | } | 764 | } |
| 760 | } | 765 | } |
| 766 | |||
| 761 | expr += ')'; | 767 | expr += ')'; |
| 762 | return expr; | 768 | return expr; |
| 763 | } | 769 | } |
| @@ -1134,7 +1140,7 @@ private: | |||
| 1134 | Type::HalfFloat); | 1140 | Type::HalfFloat); |
| 1135 | } | 1141 | } |
| 1136 | 1142 | ||
| 1137 | std::string F4Texture(Operation operation) { | 1143 | std::string Texture(Operation operation) { |
| 1138 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1144 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1139 | ASSERT(meta); | 1145 | ASSERT(meta); |
| 1140 | 1146 | ||
| @@ -1145,7 +1151,7 @@ private: | |||
| 1145 | return expr + GetSwizzle(meta->element); | 1151 | return expr + GetSwizzle(meta->element); |
| 1146 | } | 1152 | } |
| 1147 | 1153 | ||
| 1148 | std::string F4TextureLod(Operation operation) { | 1154 | std::string TextureLod(Operation operation) { |
| 1149 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1155 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1150 | ASSERT(meta); | 1156 | ASSERT(meta); |
| 1151 | 1157 | ||
| @@ -1156,7 +1162,7 @@ private: | |||
| 1156 | return expr + GetSwizzle(meta->element); | 1162 | return expr + GetSwizzle(meta->element); |
| 1157 | } | 1163 | } |
| 1158 | 1164 | ||
| 1159 | std::string F4TextureGather(Operation operation) { | 1165 | std::string TextureGather(Operation operation) { |
| 1160 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1166 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1161 | ASSERT(meta); | 1167 | ASSERT(meta); |
| 1162 | 1168 | ||
| @@ -1164,7 +1170,7 @@ private: | |||
| 1164 | GetSwizzle(meta->element); | 1170 | GetSwizzle(meta->element); |
| 1165 | } | 1171 | } |
| 1166 | 1172 | ||
| 1167 | std::string F4TextureQueryDimensions(Operation operation) { | 1173 | std::string TextureQueryDimensions(Operation operation) { |
| 1168 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1174 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1169 | ASSERT(meta); | 1175 | ASSERT(meta); |
| 1170 | 1176 | ||
| @@ -1184,7 +1190,7 @@ private: | |||
| 1184 | return "0"; | 1190 | return "0"; |
| 1185 | } | 1191 | } |
| 1186 | 1192 | ||
| 1187 | std::string F4TextureQueryLod(Operation operation) { | 1193 | std::string TextureQueryLod(Operation operation) { |
| 1188 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1194 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1189 | ASSERT(meta); | 1195 | ASSERT(meta); |
| 1190 | 1196 | ||
| @@ -1195,29 +1201,32 @@ private: | |||
| 1195 | return "0"; | 1201 | return "0"; |
| 1196 | } | 1202 | } |
| 1197 | 1203 | ||
| 1198 | std::string F4TexelFetch(Operation operation) { | 1204 | std::string TexelFetch(Operation operation) { |
| 1199 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; | 1205 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; |
| 1200 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1206 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1201 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 1202 | ASSERT(meta); | 1207 | ASSERT(meta); |
| 1208 | UNIMPLEMENTED_IF(meta->sampler.IsArray()); | ||
| 1209 | UNIMPLEMENTED_IF(!meta->extras.empty()); | ||
| 1210 | |||
| 1211 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 1203 | 1212 | ||
| 1204 | std::string expr = "texelFetch("; | 1213 | std::string expr = "texelFetch("; |
| 1205 | expr += GetSampler(meta->sampler); | 1214 | expr += GetSampler(meta->sampler); |
| 1206 | expr += ", "; | 1215 | expr += ", "; |
| 1207 | 1216 | ||
| 1208 | expr += constructors[meta->coords_count - 1]; | 1217 | expr += constructors.at(count - 1); |
| 1209 | expr += '('; | 1218 | expr += '('; |
| 1210 | for (u32 i = 0; i < count; ++i) { | 1219 | for (u32 i = 0; i < count; ++i) { |
| 1211 | expr += VisitOperand(operation, i, Type::Int); | 1220 | expr += VisitOperand(operation, i, Type::Int); |
| 1212 | 1221 | ||
| 1213 | if (i + 1 == meta->coords_count) { | 1222 | const u32 next = i + 1; |
| 1223 | if (next == count) | ||
| 1214 | expr += ')'; | 1224 | expr += ')'; |
| 1215 | } | 1225 | if (next < count) |
| 1216 | if (i + 1 < count) { | ||
| 1217 | expr += ", "; | 1226 | expr += ", "; |
| 1218 | } | ||
| 1219 | } | 1227 | } |
| 1220 | expr += ')'; | 1228 | expr += ')'; |
| 1229 | |||
| 1221 | return expr + GetSwizzle(meta->element); | 1230 | return expr + GetSwizzle(meta->element); |
| 1222 | } | 1231 | } |
| 1223 | 1232 | ||
| @@ -1454,12 +1463,12 @@ private: | |||
| 1454 | &GLSLDecompiler::Logical2HNotEqual, | 1463 | &GLSLDecompiler::Logical2HNotEqual, |
| 1455 | &GLSLDecompiler::Logical2HGreaterEqual, | 1464 | &GLSLDecompiler::Logical2HGreaterEqual, |
| 1456 | 1465 | ||
| 1457 | &GLSLDecompiler::F4Texture, | 1466 | &GLSLDecompiler::Texture, |
| 1458 | &GLSLDecompiler::F4TextureLod, | 1467 | &GLSLDecompiler::TextureLod, |
| 1459 | &GLSLDecompiler::F4TextureGather, | 1468 | &GLSLDecompiler::TextureGather, |
| 1460 | &GLSLDecompiler::F4TextureQueryDimensions, | 1469 | &GLSLDecompiler::TextureQueryDimensions, |
| 1461 | &GLSLDecompiler::F4TextureQueryLod, | 1470 | &GLSLDecompiler::TextureQueryLod, |
| 1462 | &GLSLDecompiler::F4TexelFetch, | 1471 | &GLSLDecompiler::TexelFetch, |
| 1463 | 1472 | ||
| 1464 | &GLSLDecompiler::Branch, | 1473 | &GLSLDecompiler::Branch, |
| 1465 | &GLSLDecompiler::PushFlowStack, | 1474 | &GLSLDecompiler::PushFlowStack, |
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 38bb692d6..9fd4b273e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | |||
| 41 | 41 | ||
| 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); |
| 43 | 43 | ||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | 44 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
| 45 | SetRegister(bb, instr.gpr0, value); | 45 | SetRegister(bb, instr.gpr0, value); |
| 46 | break; | 46 | break; |
| 47 | } | 47 | } |
| @@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod | |||
| 284 | SetRegister(bb, dest, value); | 284 | SetRegister(bb, dest, value); |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | } // namespace VideoCommon::Shader \ No newline at end of file | 287 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index a992f73f8..55a6fbbf2 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 118 | 118 | ||
| 119 | value = [&]() { | 119 | value = [&]() { |
| 120 | switch (instr.conversion.f2i.rounding) { | 120 | switch (instr.conversion.f2i.rounding) { |
| 121 | case Tegra::Shader::F2iRoundingOp::None: | 121 | case Tegra::Shader::F2iRoundingOp::RoundEven: |
| 122 | return value; | 122 | return Operation(OperationCode::FRoundEven, PRECISE, value); |
| 123 | case Tegra::Shader::F2iRoundingOp::Floor: | 123 | case Tegra::Shader::F2iRoundingOp::Floor: |
| 124 | return Operation(OperationCode::FFloor, PRECISE, value); | 124 | return Operation(OperationCode::FFloor, PRECISE, value); |
| 125 | case Tegra::Shader::F2iRoundingOp::Ceil: | 125 | case Tegra::Shader::F2iRoundingOp::Ceil: |
| @@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 146 | return pc; | 146 | return pc; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | } // namespace VideoCommon::Shader \ No newline at end of file | 149 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e006f8138..523421794 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -306,7 +306,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 306 | case OpCode::Id::TLD4S: { | 306 | case OpCode::Id::TLD4S: { |
| 307 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | 307 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), |
| 308 | "AOFFI is not implemented"); | 308 | "AOFFI is not implemented"); |
| 309 | |||
| 310 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | 309 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { |
| 311 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | 310 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); |
| 312 | } | 311 | } |
| @@ -315,9 +314,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 315 | const Node op_a = GetRegister(instr.gpr8); | 314 | const Node op_a = GetRegister(instr.gpr8); |
| 316 | const Node op_b = GetRegister(instr.gpr20); | 315 | const Node op_b = GetRegister(instr.gpr20); |
| 317 | 316 | ||
| 318 | std::vector<Node> coords; | ||
| 319 | |||
| 320 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | 317 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. |
| 318 | std::vector<Node> coords; | ||
| 321 | if (depth_compare) { | 319 | if (depth_compare) { |
| 322 | // Note: TLD4S coordinate encoding works just like TEXS's | 320 | // Note: TLD4S coordinate encoding works just like TEXS's |
| 323 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | 321 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); |
| @@ -328,18 +326,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 328 | coords.push_back(op_a); | 326 | coords.push_back(op_a); |
| 329 | coords.push_back(op_b); | 327 | coords.push_back(op_b); |
| 330 | } | 328 | } |
| 331 | const auto num_coords = static_cast<u32>(coords.size()); | 329 | std::vector<Node> extras; |
| 332 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | 330 | extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); |
| 333 | 331 | ||
| 334 | const auto& sampler = | 332 | const auto& sampler = |
| 335 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | 333 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |
| 336 | 334 | ||
| 337 | Node4 values; | 335 | Node4 values; |
| 338 | for (u32 element = 0; element < values.size(); ++element) { | 336 | for (u32 element = 0; element < values.size(); ++element) { |
| 339 | auto params = coords; | 337 | auto coords_copy = coords; |
| 340 | MetaTexture meta{sampler, element, num_coords}; | 338 | MetaTexture meta{sampler, {}, {}, extras, element}; |
| 341 | values[element] = | 339 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 342 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 343 | } | 340 | } |
| 344 | 341 | ||
| 345 | WriteTexsInstructionFloat(bb, instr, values); | 342 | WriteTexsInstructionFloat(bb, instr, values); |
| @@ -360,12 +357,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 360 | switch (instr.txq.query_type) { | 357 | switch (instr.txq.query_type) { |
| 361 | case Tegra::Shader::TextureQueryType::Dimension: { | 358 | case Tegra::Shader::TextureQueryType::Dimension: { |
| 362 | for (u32 element = 0; element < 4; ++element) { | 359 | for (u32 element = 0; element < 4; ++element) { |
| 363 | if (instr.txq.IsComponentEnabled(element)) { | 360 | if (!instr.txq.IsComponentEnabled(element)) { |
| 364 | MetaTexture meta{sampler, element}; | 361 | continue; |
| 365 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | ||
| 366 | std::move(meta), GetRegister(instr.gpr8)); | ||
| 367 | SetTemporal(bb, indexer++, value); | ||
| 368 | } | 362 | } |
| 363 | MetaTexture meta{sampler, {}, {}, {}, element}; | ||
| 364 | const Node value = | ||
| 365 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||
| 366 | SetTemporal(bb, indexer++, value); | ||
| 369 | } | 367 | } |
| 370 | for (u32 i = 0; i < indexer; ++i) { | 368 | for (u32 i = 0; i < indexer; ++i) { |
| 371 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 369 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
| @@ -412,9 +410,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 412 | 410 | ||
| 413 | for (u32 element = 0; element < 2; ++element) { | 411 | for (u32 element = 0; element < 2; ++element) { |
| 414 | auto params = coords; | 412 | auto params = coords; |
| 415 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; | 413 | MetaTexture meta{sampler, {}, {}, {}, element}; |
| 416 | const Node value = | 414 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 417 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 418 | SetTemporal(bb, element, value); | 415 | SetTemporal(bb, element, value); |
| 419 | } | 416 | } |
| 420 | for (u32 element = 0; element < 2; ++element) { | 417 | for (u32 element = 0; element < 2; ++element) { |
| @@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 535 | } | 532 | } |
| 536 | 533 | ||
| 537 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 534 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 538 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | 535 | TextureProcessMode process_mode, std::vector<Node> coords, |
| 539 | std::size_t array_offset, std::size_t bias_offset, | 536 | Node array, Node depth_compare, u32 bias_offset) { |
| 540 | std::vector<Node>&& coords) { | 537 | const bool is_array = array; |
| 541 | UNIMPLEMENTED_IF_MSG( | 538 | const bool is_shadow = depth_compare; |
| 542 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | ||
| 543 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | ||
| 544 | "This method is not supported."); | ||
| 545 | 539 | ||
| 546 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 540 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || |
| 541 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||
| 542 | "This method is not supported."); | ||
| 543 | |||
| 544 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 547 | 545 | ||
| 548 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | 546 | const bool lod_needed = process_mode == TextureProcessMode::LZ || |
| 549 | process_mode == TextureProcessMode::LL || | 547 | process_mode == TextureProcessMode::LL || |
| @@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 552 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | 550 | // LOD selection (either via bias or explicit textureLod) not supported in GL for |
| 553 | // sampler2DArrayShadow and samplerCubeArrayShadow. | 551 | // sampler2DArrayShadow and samplerCubeArrayShadow. |
| 554 | const bool gl_lod_supported = | 552 | const bool gl_lod_supported = |
| 555 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | 553 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || |
| 556 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | 554 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); |
| 557 | 555 | ||
| 558 | const OperationCode read_method = | 556 | const OperationCode read_method = |
| 559 | lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; | 557 | lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; |
| 560 | 558 | ||
| 561 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | 559 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); |
| 562 | 560 | ||
| 563 | std::optional<u32> array_offset_value; | 561 | std::vector<Node> extras; |
| 564 | if (is_array) | ||
| 565 | array_offset_value = static_cast<u32>(array_offset); | ||
| 566 | |||
| 567 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 568 | |||
| 569 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | 562 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
| 570 | if (process_mode == TextureProcessMode::LZ) { | 563 | if (process_mode == TextureProcessMode::LZ) { |
| 571 | coords.push_back(Immediate(0.0f)); | 564 | extras.push_back(Immediate(0.0f)); |
| 572 | } else { | 565 | } else { |
| 573 | // If present, lod or bias are always stored in the register indexed by the gpr20 | 566 | // If present, lod or bias are always stored in the register indexed by the gpr20 |
| 574 | // field with an offset depending on the usage of the other registers | 567 | // field with an offset depending on the usage of the other registers |
| 575 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | 568 | extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); |
| 576 | } | 569 | } |
| 577 | } | 570 | } |
| 578 | 571 | ||
| 579 | Node4 values; | 572 | Node4 values; |
| 580 | for (u32 element = 0; element < values.size(); ++element) { | 573 | for (u32 element = 0; element < values.size(); ++element) { |
| 581 | auto params = coords; | 574 | auto copy_coords = coords; |
| 582 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | 575 | MetaTexture meta{sampler, array, depth_compare, extras, element}; |
| 583 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | 576 | values[element] = Operation(read_method, meta, std::move(copy_coords)); |
| 584 | } | 577 | } |
| 585 | 578 | ||
| 586 | return values; | 579 | return values; |
| @@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 602 | for (std::size_t i = 0; i < coord_count; ++i) { | 595 | for (std::size_t i = 0; i < coord_count; ++i) { |
| 603 | coords.push_back(GetRegister(coord_register + i)); | 596 | coords.push_back(GetRegister(coord_register + i)); |
| 604 | } | 597 | } |
| 605 | // 1D.DC in opengl the 2nd component is ignored. | 598 | // 1D.DC in OpenGL the 2nd component is ignored. |
| 606 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | 599 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { |
| 607 | coords.push_back(Immediate(0.0f)); | 600 | coords.push_back(Immediate(0.0f)); |
| 608 | } | 601 | } |
| 609 | std::size_t array_offset{}; | 602 | |
| 610 | if (is_array) { | 603 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 611 | array_offset = coords.size(); | 604 | |
| 612 | coords.push_back(GetRegister(array_register)); | 605 | Node dc{}; |
| 613 | } | ||
| 614 | if (depth_compare) { | 606 | if (depth_compare) { |
| 615 | // Depth is always stored in the register signaled by gpr20 | 607 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod |
| 616 | // or in the next register if lod or bias are used | 608 | // or bias are used |
| 617 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | 609 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
| 618 | coords.push_back(GetRegister(depth_register)); | 610 | dc = GetRegister(depth_register); |
| 619 | } | ||
| 620 | // Fill ignored coordinates | ||
| 621 | while (coords.size() < total_coord_count) { | ||
| 622 | coords.push_back(Immediate(0)); | ||
| 623 | } | 611 | } |
| 624 | 612 | ||
| 625 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | 613 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); |
| 626 | 0, std::move(coords)); | ||
| 627 | } | 614 | } |
| 628 | 615 | ||
| 629 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 616 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| @@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 641 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | 628 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) |
| 642 | ? static_cast<u64>(instr.gpr20.Value()) | 629 | ? static_cast<u64>(instr.gpr20.Value()) |
| 643 | : coord_register + 1; | 630 | : coord_register + 1; |
| 631 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 644 | 632 | ||
| 645 | std::vector<Node> coords; | 633 | std::vector<Node> coords; |
| 646 | for (std::size_t i = 0; i < coord_count; ++i) { | 634 | for (std::size_t i = 0; i < coord_count; ++i) { |
| @@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 648 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | 636 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
| 649 | } | 637 | } |
| 650 | 638 | ||
| 651 | std::size_t array_offset{}; | 639 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 652 | if (is_array) { | 640 | |
| 653 | array_offset = coords.size(); | 641 | Node dc{}; |
| 654 | coords.push_back(GetRegister(array_register)); | ||
| 655 | } | ||
| 656 | if (depth_compare) { | 642 | if (depth_compare) { |
| 657 | // Depth is always stored in the register signaled by gpr20 | 643 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod |
| 658 | // or in the next register if lod or bias are used | 644 | // or bias are used |
| 659 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | 645 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
| 660 | coords.push_back(GetRegister(depth_register)); | 646 | dc = GetRegister(depth_register); |
| 661 | } | ||
| 662 | // Fill ignored coordinates | ||
| 663 | while (coords.size() < total_coord_count) { | ||
| 664 | coords.push_back(Immediate(0)); | ||
| 665 | } | 647 | } |
| 666 | 648 | ||
| 667 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | 649 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); |
| 668 | (coord_count > 2 ? 1 : 0), std::move(coords)); | ||
| 669 | } | 650 | } |
| 670 | 651 | ||
| 671 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 652 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
| @@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 680 | const u64 coord_register = array_register + (is_array ? 1 : 0); | 661 | const u64 coord_register = array_register + (is_array ? 1 : 0); |
| 681 | 662 | ||
| 682 | std::vector<Node> coords; | 663 | std::vector<Node> coords; |
| 683 | 664 | for (size_t i = 0; i < coord_count; ++i) | |
| 684 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 685 | coords.push_back(GetRegister(coord_register + i)); | 665 | coords.push_back(GetRegister(coord_register + i)); |
| 686 | } | ||
| 687 | std::optional<u32> array_offset; | ||
| 688 | if (is_array) { | ||
| 689 | array_offset = static_cast<u32>(coords.size()); | ||
| 690 | coords.push_back(GetRegister(array_register)); | ||
| 691 | } | ||
| 692 | 666 | ||
| 693 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 667 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 694 | 668 | ||
| 695 | Node4 values; | 669 | Node4 values; |
| 696 | for (u32 element = 0; element < values.size(); ++element) { | 670 | for (u32 element = 0; element < values.size(); ++element) { |
| 697 | auto params = coords; | 671 | auto coords_copy = coords; |
| 698 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | 672 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; |
| 699 | values[element] = | 673 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 700 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 701 | } | 674 | } |
| 702 | 675 | ||
| 703 | return values; | 676 | return values; |
| @@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 705 | 678 | ||
| 706 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 679 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 707 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 680 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 708 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | ||
| 709 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 681 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
| 710 | 682 | ||
| 711 | // If enabled arrays index is always stored in the gpr8 field | 683 | // If enabled arrays index is always stored in the gpr8 field |
| @@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 719 | : coord_register + 1; | 691 | : coord_register + 1; |
| 720 | 692 | ||
| 721 | std::vector<Node> coords; | 693 | std::vector<Node> coords; |
| 722 | |||
| 723 | for (std::size_t i = 0; i < type_coord_count; ++i) { | 694 | for (std::size_t i = 0; i < type_coord_count; ++i) { |
| 724 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | 695 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |
| 725 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | 696 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
| 726 | } | 697 | } |
| 727 | std::optional<u32> array_offset; | ||
| 728 | if (is_array) { | ||
| 729 | array_offset = static_cast<u32>(coords.size()); | ||
| 730 | coords.push_back(GetRegister(array_register)); | ||
| 731 | } | ||
| 732 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 733 | 698 | ||
| 734 | if (lod_enabled) { | 699 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 735 | // When lod is used always is in grp20 | 700 | // When lod is used always is in gpr20 |
| 736 | coords.push_back(GetRegister(instr.gpr20)); | 701 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 737 | } else { | ||
| 738 | coords.push_back(Immediate(0)); | ||
| 739 | } | ||
| 740 | 702 | ||
| 741 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 703 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
| 742 | 704 | ||
| 743 | Node4 values; | 705 | Node4 values; |
| 744 | for (u32 element = 0; element < values.size(); ++element) { | 706 | for (u32 element = 0; element < values.size(); ++element) { |
| 745 | auto params = coords; | 707 | auto coords_copy = coords; |
| 746 | MetaTexture meta{sampler, element, coords_count, array_offset}; | 708 | MetaTexture meta{sampler, array, {}, {lod}, element}; |
| 747 | values[element] = | 709 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 748 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 749 | } | 710 | } |
| 750 | return values; | 711 | return values; |
| 751 | } | 712 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1d4fbef53..52c7f2c4e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -156,12 +156,12 @@ enum class OperationCode { | |||
| 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 158 | 158 | ||
| 159 | F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | 159 | Texture, /// (MetaTexture, float[N] coords) -> float4 |
| 160 | F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | 160 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 |
| 161 | F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | 161 | TextureGather, /// (MetaTexture, float[N] coords) -> float4 |
| 162 | F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | 162 | TextureQueryDimensions, /// (MetaTexture, float a) -> float4 |
| 163 | F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 163 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 164 | F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 164 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 165 | 165 | ||
| 166 | Branch, /// (uint branch_target) -> void | 166 | Branch, /// (uint branch_target) -> void |
| 167 | PushFlowStack, /// (uint branch_target) -> void | 167 | PushFlowStack, /// (uint branch_target) -> void |
| @@ -288,9 +288,10 @@ struct MetaHalfArithmetic { | |||
| 288 | 288 | ||
| 289 | struct MetaTexture { | 289 | struct MetaTexture { |
| 290 | const Sampler& sampler; | 290 | const Sampler& sampler; |
| 291 | Node array{}; | ||
| 292 | Node depth_compare{}; | ||
| 293 | std::vector<Node> extras; | ||
| 291 | u32 element{}; | 294 | u32 element{}; |
| 292 | u32 coords_count{}; | ||
| 293 | std::optional<u32> array_index; | ||
| 294 | }; | 295 | }; |
| 295 | 296 | ||
| 296 | constexpr MetaArithmetic PRECISE = {true}; | 297 | constexpr MetaArithmetic PRECISE = {true}; |
| @@ -754,9 +755,8 @@ private: | |||
| 754 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | 755 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
| 755 | 756 | ||
| 756 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 757 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 757 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 758 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |
| 758 | bool is_array, std::size_t array_offset, std::size_t bias_offset, | 759 | Node array, Node depth_compare, u32 bias_offset); |
| 759 | std::vector<Node>&& coords); | ||
| 760 | 760 | ||
| 761 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | 761 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
| 762 | u64 byte_height); | 762 | u64 byte_height); |