diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt/texture_pass.cpp')
| -rw-r--r-- | src/shader_recompiler/ir_opt/texture_pass.cpp | 523 |
1 files changed, 523 insertions, 0 deletions
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..44ad10d43 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -0,0 +1,523 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <bit> | ||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include <boost/container/small_vector.hpp> | ||
| 10 | |||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 15 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 16 | #include "shader_recompiler/shader_info.h" | ||
| 17 | |||
| 18 | namespace Shader::Optimization { | ||
| 19 | namespace { | ||
| 20 | struct ConstBufferAddr { | ||
| 21 | u32 index; | ||
| 22 | u32 offset; | ||
| 23 | u32 secondary_index; | ||
| 24 | u32 secondary_offset; | ||
| 25 | IR::U32 dynamic_offset; | ||
| 26 | u32 count; | ||
| 27 | bool has_secondary; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct TextureInst { | ||
| 31 | ConstBufferAddr cbuf; | ||
| 32 | IR::Inst* inst; | ||
| 33 | IR::Block* block; | ||
| 34 | }; | ||
| 35 | |||
| 36 | using TextureInstVector = boost::container::small_vector<TextureInst, 24>; | ||
| 37 | |||
| 38 | constexpr u32 DESCRIPTOR_SIZE = 8; | ||
| 39 | constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE)); | ||
| 40 | |||
| 41 | IR::Opcode IndexedInstruction(const IR::Inst& inst) { | ||
| 42 | switch (inst.GetOpcode()) { | ||
| 43 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 44 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 45 | return IR::Opcode::ImageSampleImplicitLod; | ||
| 46 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 47 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 48 | return IR::Opcode::ImageSampleExplicitLod; | ||
| 49 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 50 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 51 | return IR::Opcode::ImageSampleDrefImplicitLod; | ||
| 52 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 53 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 54 | return IR::Opcode::ImageSampleDrefExplicitLod; | ||
| 55 | case IR::Opcode::BindlessImageGather: | ||
| 56 | case IR::Opcode::BoundImageGather: | ||
| 57 | return IR::Opcode::ImageGather; | ||
| 58 | case IR::Opcode::BindlessImageGatherDref: | ||
| 59 | case IR::Opcode::BoundImageGatherDref: | ||
| 60 | return IR::Opcode::ImageGatherDref; | ||
| 61 | case IR::Opcode::BindlessImageFetch: | ||
| 62 | case IR::Opcode::BoundImageFetch: | ||
| 63 | return IR::Opcode::ImageFetch; | ||
| 64 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 65 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 66 | return IR::Opcode::ImageQueryDimensions; | ||
| 67 | case IR::Opcode::BoundImageQueryLod: | ||
| 68 | case IR::Opcode::BindlessImageQueryLod: | ||
| 69 | return IR::Opcode::ImageQueryLod; | ||
| 70 | case IR::Opcode::BoundImageGradient: | ||
| 71 | case IR::Opcode::BindlessImageGradient: | ||
| 72 | return IR::Opcode::ImageGradient; | ||
| 73 | case IR::Opcode::BoundImageRead: | ||
| 74 | case IR::Opcode::BindlessImageRead: | ||
| 75 | return IR::Opcode::ImageRead; | ||
| 76 | case IR::Opcode::BoundImageWrite: | ||
| 77 | case IR::Opcode::BindlessImageWrite: | ||
| 78 | return IR::Opcode::ImageWrite; | ||
| 79 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 80 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 81 | return IR::Opcode::ImageAtomicIAdd32; | ||
| 82 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 83 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 84 | return IR::Opcode::ImageAtomicSMin32; | ||
| 85 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 86 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 87 | return IR::Opcode::ImageAtomicUMin32; | ||
| 88 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 89 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 90 | return IR::Opcode::ImageAtomicSMax32; | ||
| 91 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 92 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 93 | return IR::Opcode::ImageAtomicUMax32; | ||
| 94 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 95 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 96 | return IR::Opcode::ImageAtomicInc32; | ||
| 97 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 98 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 99 | return IR::Opcode::ImageAtomicDec32; | ||
| 100 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 101 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 102 | return IR::Opcode::ImageAtomicAnd32; | ||
| 103 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 104 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 105 | return IR::Opcode::ImageAtomicOr32; | ||
| 106 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 107 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 108 | return IR::Opcode::ImageAtomicXor32; | ||
| 109 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 110 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 111 | return IR::Opcode::ImageAtomicExchange32; | ||
| 112 | default: | ||
| 113 | return IR::Opcode::Void; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | bool IsBindless(const IR::Inst& inst) { | ||
| 118 | switch (inst.GetOpcode()) { | ||
| 119 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 120 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 121 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 122 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 123 | case IR::Opcode::BindlessImageGather: | ||
| 124 | case IR::Opcode::BindlessImageGatherDref: | ||
| 125 | case IR::Opcode::BindlessImageFetch: | ||
| 126 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 127 | case IR::Opcode::BindlessImageQueryLod: | ||
| 128 | case IR::Opcode::BindlessImageGradient: | ||
| 129 | case IR::Opcode::BindlessImageRead: | ||
| 130 | case IR::Opcode::BindlessImageWrite: | ||
| 131 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 132 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 133 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 134 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 135 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 136 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 137 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 138 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 139 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 140 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 141 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 142 | return true; | ||
| 143 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 144 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 145 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 146 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 147 | case IR::Opcode::BoundImageGather: | ||
| 148 | case IR::Opcode::BoundImageGatherDref: | ||
| 149 | case IR::Opcode::BoundImageFetch: | ||
| 150 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 151 | case IR::Opcode::BoundImageQueryLod: | ||
| 152 | case IR::Opcode::BoundImageGradient: | ||
| 153 | case IR::Opcode::BoundImageRead: | ||
| 154 | case IR::Opcode::BoundImageWrite: | ||
| 155 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 156 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 157 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 158 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 159 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 160 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 161 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 162 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 163 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 164 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 165 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 166 | return false; | ||
| 167 | default: | ||
| 168 | throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | bool IsTextureInstruction(const IR::Inst& inst) { | ||
| 173 | return IndexedInstruction(inst) != IR::Opcode::Void; | ||
| 174 | } | ||
| 175 | |||
| 176 | std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst); | ||
| 177 | |||
| 178 | std::optional<ConstBufferAddr> Track(const IR::Value& value) { | ||
| 179 | return IR::BreadthFirstSearch(value, TryGetConstBuffer); | ||
| 180 | } | ||
| 181 | |||
| 182 | std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { | ||
| 183 | switch (inst->GetOpcode()) { | ||
| 184 | default: | ||
| 185 | return std::nullopt; | ||
| 186 | case IR::Opcode::BitwiseOr32: { | ||
| 187 | std::optional lhs{Track(inst->Arg(0))}; | ||
| 188 | std::optional rhs{Track(inst->Arg(1))}; | ||
| 189 | if (!lhs || !rhs) { | ||
| 190 | return std::nullopt; | ||
| 191 | } | ||
| 192 | if (lhs->has_secondary || rhs->has_secondary) { | ||
| 193 | return std::nullopt; | ||
| 194 | } | ||
| 195 | if (lhs->count > 1 || rhs->count > 1) { | ||
| 196 | return std::nullopt; | ||
| 197 | } | ||
| 198 | if (lhs->index > rhs->index || lhs->offset > rhs->offset) { | ||
| 199 | std::swap(lhs, rhs); | ||
| 200 | } | ||
| 201 | return ConstBufferAddr{ | ||
| 202 | .index = lhs->index, | ||
| 203 | .offset = lhs->offset, | ||
| 204 | .secondary_index = rhs->index, | ||
| 205 | .secondary_offset = rhs->offset, | ||
| 206 | .dynamic_offset = {}, | ||
| 207 | .count = 1, | ||
| 208 | .has_secondary = true, | ||
| 209 | }; | ||
| 210 | } | ||
| 211 | case IR::Opcode::GetCbufU32x2: | ||
| 212 | case IR::Opcode::GetCbufU32: | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | const IR::Value index{inst->Arg(0)}; | ||
| 216 | const IR::Value offset{inst->Arg(1)}; | ||
| 217 | if (!index.IsImmediate()) { | ||
| 218 | // Reading a bindless texture from variable indices is valid | ||
| 219 | // but not supported here at the moment | ||
| 220 | return std::nullopt; | ||
| 221 | } | ||
| 222 | if (offset.IsImmediate()) { | ||
| 223 | return ConstBufferAddr{ | ||
| 224 | .index = index.U32(), | ||
| 225 | .offset = offset.U32(), | ||
| 226 | .secondary_index = 0, | ||
| 227 | .secondary_offset = 0, | ||
| 228 | .dynamic_offset = {}, | ||
| 229 | .count = 1, | ||
| 230 | .has_secondary = false, | ||
| 231 | }; | ||
| 232 | } | ||
| 233 | IR::Inst* const offset_inst{offset.InstRecursive()}; | ||
| 234 | if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { | ||
| 235 | return std::nullopt; | ||
| 236 | } | ||
| 237 | u32 base_offset{}; | ||
| 238 | IR::U32 dynamic_offset; | ||
| 239 | if (offset_inst->Arg(0).IsImmediate()) { | ||
| 240 | base_offset = offset_inst->Arg(0).U32(); | ||
| 241 | dynamic_offset = IR::U32{offset_inst->Arg(1)}; | ||
| 242 | } else if (offset_inst->Arg(1).IsImmediate()) { | ||
| 243 | base_offset = offset_inst->Arg(1).U32(); | ||
| 244 | dynamic_offset = IR::U32{offset_inst->Arg(0)}; | ||
| 245 | } else { | ||
| 246 | return std::nullopt; | ||
| 247 | } | ||
| 248 | return ConstBufferAddr{ | ||
| 249 | .index = index.U32(), | ||
| 250 | .offset = base_offset, | ||
| 251 | .secondary_index = 0, | ||
| 252 | .secondary_offset = 0, | ||
| 253 | .dynamic_offset = dynamic_offset, | ||
| 254 | .count = 8, | ||
| 255 | .has_secondary = false, | ||
| 256 | }; | ||
| 257 | } | ||
| 258 | |||
| 259 | TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { | ||
| 260 | ConstBufferAddr addr; | ||
| 261 | if (IsBindless(inst)) { | ||
| 262 | const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))}; | ||
| 263 | if (!track_addr) { | ||
| 264 | throw NotImplementedException("Failed to track bindless texture constant buffer"); | ||
| 265 | } | ||
| 266 | addr = *track_addr; | ||
| 267 | } else { | ||
| 268 | addr = ConstBufferAddr{ | ||
| 269 | .index = env.TextureBoundBuffer(), | ||
| 270 | .offset = inst.Arg(0).U32(), | ||
| 271 | .secondary_index = 0, | ||
| 272 | .secondary_offset = 0, | ||
| 273 | .dynamic_offset = {}, | ||
| 274 | .count = 1, | ||
| 275 | .has_secondary = false, | ||
| 276 | }; | ||
| 277 | } | ||
| 278 | return TextureInst{ | ||
| 279 | .cbuf = addr, | ||
| 280 | .inst = &inst, | ||
| 281 | .block = block, | ||
| 282 | }; | ||
| 283 | } | ||
| 284 | |||
| 285 | TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { | ||
| 286 | const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; | ||
| 287 | const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; | ||
| 288 | const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; | ||
| 289 | const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; | ||
| 290 | return env.ReadTextureType(lhs_raw | rhs_raw); | ||
| 291 | } | ||
| 292 | |||
| 293 | class Descriptors { | ||
| 294 | public: | ||
| 295 | explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, | ||
| 296 | ImageBufferDescriptors& image_buffer_descriptors_, | ||
| 297 | TextureDescriptors& texture_descriptors_, | ||
| 298 | ImageDescriptors& image_descriptors_) | ||
| 299 | : texture_buffer_descriptors{texture_buffer_descriptors_}, | ||
| 300 | image_buffer_descriptors{image_buffer_descriptors_}, | ||
| 301 | texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} | ||
| 302 | |||
| 303 | u32 Add(const TextureBufferDescriptor& desc) { | ||
| 304 | return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { | ||
| 305 | return desc.cbuf_index == existing.cbuf_index && | ||
| 306 | desc.cbuf_offset == existing.cbuf_offset && | ||
| 307 | desc.secondary_cbuf_index == existing.secondary_cbuf_index && | ||
| 308 | desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && | ||
| 309 | desc.count == existing.count && desc.size_shift == existing.size_shift && | ||
| 310 | desc.has_secondary == existing.has_secondary; | ||
| 311 | }); | ||
| 312 | } | ||
| 313 | |||
| 314 | u32 Add(const ImageBufferDescriptor& desc) { | ||
| 315 | const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { | ||
| 316 | return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && | ||
| 317 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||
| 318 | desc.size_shift == existing.size_shift; | ||
| 319 | })}; | ||
| 320 | image_buffer_descriptors[index].is_written |= desc.is_written; | ||
| 321 | image_buffer_descriptors[index].is_read |= desc.is_read; | ||
| 322 | return index; | ||
| 323 | } | ||
| 324 | |||
| 325 | u32 Add(const TextureDescriptor& desc) { | ||
| 326 | return Add(texture_descriptors, desc, [&desc](const auto& existing) { | ||
| 327 | return desc.type == existing.type && desc.is_depth == existing.is_depth && | ||
| 328 | desc.has_secondary == existing.has_secondary && | ||
| 329 | desc.cbuf_index == existing.cbuf_index && | ||
| 330 | desc.cbuf_offset == existing.cbuf_offset && | ||
| 331 | desc.secondary_cbuf_index == existing.secondary_cbuf_index && | ||
| 332 | desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && | ||
| 333 | desc.count == existing.count && desc.size_shift == existing.size_shift; | ||
| 334 | }); | ||
| 335 | } | ||
| 336 | |||
| 337 | u32 Add(const ImageDescriptor& desc) { | ||
| 338 | const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { | ||
| 339 | return desc.type == existing.type && desc.format == existing.format && | ||
| 340 | desc.cbuf_index == existing.cbuf_index && | ||
| 341 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||
| 342 | desc.size_shift == existing.size_shift; | ||
| 343 | })}; | ||
| 344 | image_descriptors[index].is_written |= desc.is_written; | ||
| 345 | image_descriptors[index].is_read |= desc.is_read; | ||
| 346 | return index; | ||
| 347 | } | ||
| 348 | |||
| 349 | private: | ||
| 350 | template <typename Descriptors, typename Descriptor, typename Func> | ||
| 351 | static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||
| 352 | // TODO: Handle arrays | ||
| 353 | const auto it{std::ranges::find_if(descriptors, pred)}; | ||
| 354 | if (it != descriptors.end()) { | ||
| 355 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 356 | } | ||
| 357 | descriptors.push_back(desc); | ||
| 358 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 359 | } | ||
| 360 | |||
| 361 | TextureBufferDescriptors& texture_buffer_descriptors; | ||
| 362 | ImageBufferDescriptors& image_buffer_descriptors; | ||
| 363 | TextureDescriptors& texture_descriptors; | ||
| 364 | ImageDescriptors& image_descriptors; | ||
| 365 | }; | ||
| 366 | } // Anonymous namespace | ||
| 367 | |||
| 368 | void TexturePass(Environment& env, IR::Program& program) { | ||
| 369 | TextureInstVector to_replace; | ||
| 370 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 371 | for (IR::Inst& inst : block->Instructions()) { | ||
| 372 | if (!IsTextureInstruction(inst)) { | ||
| 373 | continue; | ||
| 374 | } | ||
| 375 | to_replace.push_back(MakeInst(env, block, inst)); | ||
| 376 | } | ||
| 377 | } | ||
| 378 | // Sort instructions to visit textures by constant buffer index, then by offset | ||
| 379 | std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { | ||
| 380 | return lhs.cbuf.offset < rhs.cbuf.offset; | ||
| 381 | }); | ||
| 382 | std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { | ||
| 383 | return lhs.cbuf.index < rhs.cbuf.index; | ||
| 384 | }); | ||
| 385 | Descriptors descriptors{ | ||
| 386 | program.info.texture_buffer_descriptors, | ||
| 387 | program.info.image_buffer_descriptors, | ||
| 388 | program.info.texture_descriptors, | ||
| 389 | program.info.image_descriptors, | ||
| 390 | }; | ||
| 391 | for (TextureInst& texture_inst : to_replace) { | ||
| 392 | // TODO: Handle arrays | ||
| 393 | IR::Inst* const inst{texture_inst.inst}; | ||
| 394 | inst->ReplaceOpcode(IndexedInstruction(*inst)); | ||
| 395 | |||
| 396 | const auto& cbuf{texture_inst.cbuf}; | ||
| 397 | auto flags{inst->Flags<IR::TextureInstInfo>()}; | ||
| 398 | switch (inst->GetOpcode()) { | ||
| 399 | case IR::Opcode::ImageQueryDimensions: | ||
| 400 | flags.type.Assign(ReadTextureType(env, cbuf)); | ||
| 401 | inst->SetFlags(flags); | ||
| 402 | break; | ||
| 403 | case IR::Opcode::ImageFetch: | ||
| 404 | if (flags.type != TextureType::Color1D) { | ||
| 405 | break; | ||
| 406 | } | ||
| 407 | if (ReadTextureType(env, cbuf) == TextureType::Buffer) { | ||
| 408 | // Replace with the bound texture type only when it's a texture buffer | ||
| 409 | // If the instruction is 1D and the bound type is 2D, don't change the code and let | ||
| 410 | // the rasterizer robustness handle it | ||
| 411 | // This happens on Fire Emblem: Three Houses | ||
| 412 | flags.type.Assign(TextureType::Buffer); | ||
| 413 | } | ||
| 414 | break; | ||
| 415 | default: | ||
| 416 | break; | ||
| 417 | } | ||
| 418 | u32 index; | ||
| 419 | switch (inst->GetOpcode()) { | ||
| 420 | case IR::Opcode::ImageRead: | ||
| 421 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 422 | case IR::Opcode::ImageAtomicSMin32: | ||
| 423 | case IR::Opcode::ImageAtomicUMin32: | ||
| 424 | case IR::Opcode::ImageAtomicSMax32: | ||
| 425 | case IR::Opcode::ImageAtomicUMax32: | ||
| 426 | case IR::Opcode::ImageAtomicInc32: | ||
| 427 | case IR::Opcode::ImageAtomicDec32: | ||
| 428 | case IR::Opcode::ImageAtomicAnd32: | ||
| 429 | case IR::Opcode::ImageAtomicOr32: | ||
| 430 | case IR::Opcode::ImageAtomicXor32: | ||
| 431 | case IR::Opcode::ImageAtomicExchange32: | ||
| 432 | case IR::Opcode::ImageWrite: { | ||
| 433 | if (cbuf.has_secondary) { | ||
| 434 | throw NotImplementedException("Unexpected separate sampler"); | ||
| 435 | } | ||
| 436 | const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; | ||
| 437 | const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; | ||
| 438 | if (flags.type == TextureType::Buffer) { | ||
| 439 | index = descriptors.Add(ImageBufferDescriptor{ | ||
| 440 | .format = flags.image_format, | ||
| 441 | .is_written = is_written, | ||
| 442 | .is_read = is_read, | ||
| 443 | .cbuf_index = cbuf.index, | ||
| 444 | .cbuf_offset = cbuf.offset, | ||
| 445 | .count = cbuf.count, | ||
| 446 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 447 | }); | ||
| 448 | } else { | ||
| 449 | index = descriptors.Add(ImageDescriptor{ | ||
| 450 | .type = flags.type, | ||
| 451 | .format = flags.image_format, | ||
| 452 | .is_written = is_written, | ||
| 453 | .is_read = is_read, | ||
| 454 | .cbuf_index = cbuf.index, | ||
| 455 | .cbuf_offset = cbuf.offset, | ||
| 456 | .count = cbuf.count, | ||
| 457 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 458 | }); | ||
| 459 | } | ||
| 460 | break; | ||
| 461 | } | ||
| 462 | default: | ||
| 463 | if (flags.type == TextureType::Buffer) { | ||
| 464 | index = descriptors.Add(TextureBufferDescriptor{ | ||
| 465 | .has_secondary = cbuf.has_secondary, | ||
| 466 | .cbuf_index = cbuf.index, | ||
| 467 | .cbuf_offset = cbuf.offset, | ||
| 468 | .secondary_cbuf_index = cbuf.secondary_index, | ||
| 469 | .secondary_cbuf_offset = cbuf.secondary_offset, | ||
| 470 | .count = cbuf.count, | ||
| 471 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 472 | }); | ||
| 473 | } else { | ||
| 474 | index = descriptors.Add(TextureDescriptor{ | ||
| 475 | .type = flags.type, | ||
| 476 | .is_depth = flags.is_depth != 0, | ||
| 477 | .has_secondary = cbuf.has_secondary, | ||
| 478 | .cbuf_index = cbuf.index, | ||
| 479 | .cbuf_offset = cbuf.offset, | ||
| 480 | .secondary_cbuf_index = cbuf.secondary_index, | ||
| 481 | .secondary_cbuf_offset = cbuf.secondary_offset, | ||
| 482 | .count = cbuf.count, | ||
| 483 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 484 | }); | ||
| 485 | } | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | flags.descriptor_index.Assign(index); | ||
| 489 | inst->SetFlags(flags); | ||
| 490 | |||
| 491 | if (cbuf.count > 1) { | ||
| 492 | const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; | ||
| 493 | IR::IREmitter ir{*texture_inst.block, insert_point}; | ||
| 494 | const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; | ||
| 495 | inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); | ||
| 496 | } else { | ||
| 497 | inst->SetArg(0, IR::Value{}); | ||
| 498 | } | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 | void JoinTextureInfo(Info& base, Info& source) { | ||
| 503 | Descriptors descriptors{ | ||
| 504 | base.texture_buffer_descriptors, | ||
| 505 | base.image_buffer_descriptors, | ||
| 506 | base.texture_descriptors, | ||
| 507 | base.image_descriptors, | ||
| 508 | }; | ||
| 509 | for (auto& desc : source.texture_buffer_descriptors) { | ||
| 510 | descriptors.Add(desc); | ||
| 511 | } | ||
| 512 | for (auto& desc : source.image_buffer_descriptors) { | ||
| 513 | descriptors.Add(desc); | ||
| 514 | } | ||
| 515 | for (auto& desc : source.texture_descriptors) { | ||
| 516 | descriptors.Add(desc); | ||
| 517 | } | ||
| 518 | for (auto& desc : source.image_descriptors) { | ||
| 519 | descriptors.Add(desc); | ||
| 520 | } | ||
| 521 | } | ||
| 522 | |||
| 523 | } // namespace Shader::Optimization | ||