diff options
| author | 2021-03-27 01:45:20 +0100 | |
|---|---|---|
| committer | 2021-07-22 21:51:25 -0400 | |
| commit | 2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a (patch) | |
| tree | b64953421a84bf93c35f18b60ec7d29833ed0231 | |
| parent | shader: Implement TLD (diff) | |
| download | yuzu-2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a.tar.gz yuzu-2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a.tar.xz yuzu-2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a.zip | |
shader: Implement TLDS
3 files changed, 253 insertions, 4 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 4b4c43ba8..d3afd7d31 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC | |||
| 133 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | 133 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp |
| 134 | frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | 134 | frontend/maxwell/translate/impl/texture_gather_swizzled.cpp |
| 135 | frontend/maxwell/translate/impl/texture_gather.cpp | 135 | frontend/maxwell/translate/impl/texture_gather.cpp |
| 136 | frontend/maxwell/translate/impl/texture_load_swizzled.cpp | ||
| 136 | frontend/maxwell/translate/impl/texture_load.cpp | 137 | frontend/maxwell/translate/impl/texture_load.cpp |
| 137 | frontend/maxwell/translate/impl/texture_query.cpp | 138 | frontend/maxwell/translate/impl/texture_query.cpp |
| 138 | frontend/maxwell/translate/impl/video_helper.cpp | 139 | frontend/maxwell/translate/impl/video_helper.cpp |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 60d61ec6e..7e1ad63e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -313,10 +313,6 @@ void TranslatorVisitor::SYNC(u64) { | |||
| 313 | ThrowNotImplemented(Opcode::SYNC); | 313 | ThrowNotImplemented(Opcode::SYNC); |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | void TranslatorVisitor::TLDS(u64) { | ||
| 317 | ThrowNotImplemented(Opcode::TLDS); | ||
| 318 | } | ||
| 319 | |||
| 320 | void TranslatorVisitor::TMML(u64) { | 316 | void TranslatorVisitor::TMML(u64) { |
| 321 | ThrowNotImplemented(Opcode::TMML); | 317 | ThrowNotImplemented(Opcode::TMML); |
| 322 | } | 318 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..3e6ebd911 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp | |||
| @@ -0,0 +1,252 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | constexpr unsigned R = 1; | ||
| 20 | constexpr unsigned G = 2; | ||
| 21 | constexpr unsigned B = 4; | ||
| 22 | constexpr unsigned A = 8; | ||
| 23 | |||
| 24 | constexpr std::array RG_LUT{ | ||
| 25 | R, // | ||
| 26 | G, // | ||
| 27 | B, // | ||
| 28 | A, // | ||
| 29 | R | G, // | ||
| 30 | R | A, // | ||
| 31 | G | A, // | ||
| 32 | B | A, // | ||
| 33 | }; | ||
| 34 | |||
| 35 | constexpr std::array RGBA_LUT{ | ||
| 36 | R | G | B, // | ||
| 37 | R | G | A, // | ||
| 38 | R | B | A, // | ||
| 39 | G | B | A, // | ||
| 40 | R | G | B | A, // | ||
| 41 | }; | ||
| 42 | |||
| 43 | union Encoding { | ||
| 44 | u64 raw; | ||
| 45 | BitField<59, 1, Precision> precision; | ||
| 46 | BitField<54, 1, u64> aoffi; | ||
| 47 | BitField<53, 1, u64> lod; | ||
| 48 | BitField<55, 1, u64> ms; | ||
| 49 | BitField<49, 1, u64> nodep; | ||
| 50 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 52 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 53 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 54 | BitField<36, 13, u64> cbuf_offset; | ||
| 55 | BitField<50, 3, u64> swizzle; | ||
| 56 | BitField<53, 4, u64> encoding; | ||
| 57 | }; | ||
| 58 | |||
| 59 | void CheckAlignment(IR::Reg reg, int alignment) { | ||
| 60 | if (!IR::IsAligned(reg, alignment)) { | ||
| 61 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 66 | const IR::U32 value{v.X(reg)}; | ||
| 67 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 68 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding tlds{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; | ||
| 74 | const IR::Reg reg_a{tlds.src_reg_a}; | ||
| 75 | const IR::Reg reg_b{tlds.src_reg_b}; | ||
| 76 | IR::Value coords; | ||
| 77 | IR::U32 lod; | ||
| 78 | IR::Value offsets; | ||
| 79 | IR::U32 multisample; | ||
| 80 | Shader::TextureType texture_type; | ||
| 81 | switch (tlds.encoding) { | ||
| 82 | case 0: { | ||
| 83 | texture_type = Shader::TextureType::Color1D; | ||
| 84 | coords = v.X(reg_a); | ||
| 85 | break; | ||
| 86 | } | ||
| 87 | case 1: { | ||
| 88 | texture_type = Shader::TextureType::Color1D; | ||
| 89 | coords = v.X(reg_a); | ||
| 90 | lod = v.X(reg_b); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case 2: { | ||
| 94 | texture_type = Shader::TextureType::Color2D; | ||
| 95 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | case 4: { | ||
| 99 | CheckAlignment(reg_a, 2); | ||
| 100 | texture_type = Shader::TextureType::Color2D; | ||
| 101 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 102 | offsets = MakeOffset(v, reg_b); | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | case 5: { | ||
| 106 | CheckAlignment(reg_a, 2); | ||
| 107 | texture_type = Shader::TextureType::Color2D; | ||
| 108 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 109 | lod = v.X(reg_b); | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case 6: { | ||
| 113 | CheckAlignment(reg_a, 2); | ||
| 114 | texture_type = Shader::TextureType::Color2D; | ||
| 115 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 116 | multisample = v.X(reg_b); | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | case 7: { | ||
| 120 | CheckAlignment(reg_a, 2); | ||
| 121 | texture_type = Shader::TextureType::Color3D; | ||
| 122 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | case 8: { | ||
| 126 | CheckAlignment(reg_b, 2); | ||
| 127 | texture_type = Shader::TextureType::ColorArray2D; | ||
| 128 | IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); | ||
| 129 | coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case 12: { | ||
| 133 | CheckAlignment(reg_a, 2); | ||
| 134 | CheckAlignment(reg_b, 2); | ||
| 135 | texture_type = Shader::TextureType::Color2D; | ||
| 136 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 137 | lod = v.X(reg_b); | ||
| 138 | offsets = MakeOffset(v, reg_b + 1); | ||
| 139 | break; | ||
| 140 | } | ||
| 141 | default: { | ||
| 142 | throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | IR::TextureInstInfo info{}; | ||
| 147 | if (tlds.precision == Precision::F16) { | ||
| 148 | info.relaxed_precision.Assign(1); | ||
| 149 | } | ||
| 150 | info.type.Assign(texture_type); | ||
| 151 | return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); | ||
| 152 | } | ||
| 153 | |||
| 154 | unsigned Swizzle(u64 insn) { | ||
| 155 | const Encoding tlds{insn}; | ||
| 156 | const size_t encoding{tlds.swizzle}; | ||
| 157 | if (tlds.dest_reg_b == IR::Reg::RZ) { | ||
| 158 | if (encoding >= RG_LUT.size()) { | ||
| 159 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 160 | } | ||
| 161 | return RG_LUT[encoding]; | ||
| 162 | } else { | ||
| 163 | if (encoding >= RGBA_LUT.size()) { | ||
| 164 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 165 | } | ||
| 166 | return RGBA_LUT[encoding]; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 171 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 172 | } | ||
| 173 | |||
| 174 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 175 | const Encoding tlds{insn}; | ||
| 176 | switch (index) { | ||
| 177 | case 0: | ||
| 178 | return tlds.dest_reg_a; | ||
| 179 | case 1: | ||
| 180 | CheckAlignment(tlds.dest_reg_a, 2); | ||
| 181 | return tlds.dest_reg_a + 1; | ||
| 182 | case 2: | ||
| 183 | return tlds.dest_reg_b; | ||
| 184 | case 3: | ||
| 185 | CheckAlignment(tlds.dest_reg_b, 2); | ||
| 186 | return tlds.dest_reg_b + 1; | ||
| 187 | } | ||
| 188 | throw LogicError("Invalid store index {}", index); | ||
| 189 | } | ||
| 190 | |||
| 191 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 192 | const unsigned swizzle{Swizzle(insn)}; | ||
| 193 | unsigned store_index{0}; | ||
| 194 | for (unsigned component = 0; component < 4; ++component) { | ||
| 195 | if (((swizzle >> component) & 1) == 0) { | ||
| 196 | continue; | ||
| 197 | } | ||
| 198 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 199 | v.F(dest, Extract(v, sample, component)); | ||
| 200 | ++store_index; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 205 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 206 | } | ||
| 207 | |||
| 208 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 209 | const unsigned swizzle{Swizzle(insn)}; | ||
| 210 | unsigned store_index{0}; | ||
| 211 | std::array<IR::F32, 4> swizzled; | ||
| 212 | for (unsigned component = 0; component < 4; ++component) { | ||
| 213 | if (((swizzle >> component) & 1) == 0) { | ||
| 214 | continue; | ||
| 215 | } | ||
| 216 | swizzled[store_index] = Extract(v, sample, component); | ||
| 217 | ++store_index; | ||
| 218 | } | ||
| 219 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 220 | const Encoding tlds{insn}; | ||
| 221 | switch (store_index) { | ||
| 222 | case 1: | ||
| 223 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 224 | break; | ||
| 225 | case 2: | ||
| 226 | case 3: | ||
| 227 | case 4: | ||
| 228 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 229 | switch (store_index) { | ||
| 230 | case 2: | ||
| 231 | break; | ||
| 232 | case 3: | ||
| 233 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 234 | break; | ||
| 235 | case 4: | ||
| 236 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 237 | break; | ||
| 238 | } | ||
| 239 | break; | ||
| 240 | } | ||
| 241 | } | ||
| 242 | } // Anonymous namespace | ||
| 243 | |||
| 244 | void TranslatorVisitor::TLDS(u64 insn) { | ||
| 245 | const IR::Value sample{Sample(*this, insn)}; | ||
| 246 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 247 | Store32(*this, insn, sample); | ||
| 248 | } else { | ||
| 249 | Store16(*this, insn, sample); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | } // namespace Shader::Maxwell | ||