diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
4 files changed, 146 insertions, 15 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 960beadd4..cdbe85221 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { | |||
| 25 | 25 | ||
| 26 | void VisitUsages(Info& info, IR::Inst& inst) { | 26 | void VisitUsages(Info& info, IR::Inst& inst) { |
| 27 | switch (inst.Opcode()) { | 27 | switch (inst.Opcode()) { |
| 28 | case IR::Opcode::WorkgroupId: | ||
| 29 | info.uses_workgroup_id = true; | ||
| 30 | break; | ||
| 31 | case IR::Opcode::LocalInvocationId: | ||
| 32 | info.uses_local_invocation_id = true; | ||
| 33 | break; | ||
| 34 | case IR::Opcode::CompositeConstructF16x2: | 28 | case IR::Opcode::CompositeConstructF16x2: |
| 35 | case IR::Opcode::CompositeConstructF16x3: | 29 | case IR::Opcode::CompositeConstructF16x3: |
| 36 | case IR::Opcode::CompositeConstructF16x4: | 30 | case IR::Opcode::CompositeConstructF16x4: |
| 37 | case IR::Opcode::CompositeExtractF16x2: | 31 | case IR::Opcode::CompositeExtractF16x2: |
| 38 | case IR::Opcode::CompositeExtractF16x3: | 32 | case IR::Opcode::CompositeExtractF16x3: |
| 39 | case IR::Opcode::CompositeExtractF16x4: | 33 | case IR::Opcode::CompositeExtractF16x4: |
| 34 | case IR::Opcode::SelectF16: | ||
| 40 | case IR::Opcode::BitCastU16F16: | 35 | case IR::Opcode::BitCastU16F16: |
| 41 | case IR::Opcode::BitCastF16U16: | 36 | case IR::Opcode::BitCastF16U16: |
| 42 | case IR::Opcode::PackFloat2x16: | 37 | case IR::Opcode::PackFloat2x16: |
| @@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 75 | case IR::Opcode::FPTrunc64: | 70 | case IR::Opcode::FPTrunc64: |
| 76 | info.uses_fp64 = true; | 71 | info.uses_fp64 = true; |
| 77 | break; | 72 | break; |
| 78 | case IR::Opcode::GetCbuf: | 73 | default: |
| 74 | break; | ||
| 75 | } | ||
| 76 | switch (inst.Opcode()) { | ||
| 77 | case IR::Opcode::GetCbufU8: | ||
| 78 | case IR::Opcode::GetCbufS8: | ||
| 79 | case IR::Opcode::UndefU8: | ||
| 80 | case IR::Opcode::LoadGlobalU8: | ||
| 81 | case IR::Opcode::LoadGlobalS8: | ||
| 82 | case IR::Opcode::WriteGlobalU8: | ||
| 83 | case IR::Opcode::WriteGlobalS8: | ||
| 84 | case IR::Opcode::LoadStorageU8: | ||
| 85 | case IR::Opcode::LoadStorageS8: | ||
| 86 | case IR::Opcode::WriteStorageU8: | ||
| 87 | case IR::Opcode::WriteStorageS8: | ||
| 88 | case IR::Opcode::SelectU8: | ||
| 89 | info.uses_int8 = true; | ||
| 90 | break; | ||
| 91 | default: | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | switch (inst.Opcode()) { | ||
| 95 | case IR::Opcode::GetCbufU16: | ||
| 96 | case IR::Opcode::GetCbufS16: | ||
| 97 | case IR::Opcode::UndefU16: | ||
| 98 | case IR::Opcode::LoadGlobalU16: | ||
| 99 | case IR::Opcode::LoadGlobalS16: | ||
| 100 | case IR::Opcode::WriteGlobalU16: | ||
| 101 | case IR::Opcode::WriteGlobalS16: | ||
| 102 | case IR::Opcode::LoadStorageU16: | ||
| 103 | case IR::Opcode::LoadStorageS16: | ||
| 104 | case IR::Opcode::WriteStorageU16: | ||
| 105 | case IR::Opcode::WriteStorageS16: | ||
| 106 | case IR::Opcode::SelectU16: | ||
| 107 | case IR::Opcode::BitCastU16F16: | ||
| 108 | case IR::Opcode::BitCastF16U16: | ||
| 109 | case IR::Opcode::ConvertS16F16: | ||
| 110 | case IR::Opcode::ConvertS16F32: | ||
| 111 | case IR::Opcode::ConvertS16F64: | ||
| 112 | case IR::Opcode::ConvertU16F16: | ||
| 113 | case IR::Opcode::ConvertU16F32: | ||
| 114 | case IR::Opcode::ConvertU16F64: | ||
| 115 | info.uses_int16 = true; | ||
| 116 | break; | ||
| 117 | default: | ||
| 118 | break; | ||
| 119 | } | ||
| 120 | switch (inst.Opcode()) { | ||
| 121 | case IR::Opcode::GetCbufU64: | ||
| 122 | case IR::Opcode::UndefU64: | ||
| 123 | case IR::Opcode::LoadGlobalU8: | ||
| 124 | case IR::Opcode::LoadGlobalS8: | ||
| 125 | case IR::Opcode::LoadGlobalU16: | ||
| 126 | case IR::Opcode::LoadGlobalS16: | ||
| 127 | case IR::Opcode::LoadGlobal32: | ||
| 128 | case IR::Opcode::LoadGlobal64: | ||
| 129 | case IR::Opcode::LoadGlobal128: | ||
| 130 | case IR::Opcode::WriteGlobalU8: | ||
| 131 | case IR::Opcode::WriteGlobalS8: | ||
| 132 | case IR::Opcode::WriteGlobalU16: | ||
| 133 | case IR::Opcode::WriteGlobalS16: | ||
| 134 | case IR::Opcode::WriteGlobal32: | ||
| 135 | case IR::Opcode::WriteGlobal64: | ||
| 136 | case IR::Opcode::WriteGlobal128: | ||
| 137 | case IR::Opcode::SelectU64: | ||
| 138 | case IR::Opcode::BitCastU64F64: | ||
| 139 | case IR::Opcode::BitCastF64U64: | ||
| 140 | case IR::Opcode::PackUint2x32: | ||
| 141 | case IR::Opcode::UnpackUint2x32: | ||
| 142 | case IR::Opcode::IAdd64: | ||
| 143 | case IR::Opcode::ISub64: | ||
| 144 | case IR::Opcode::INeg64: | ||
| 145 | case IR::Opcode::ShiftLeftLogical64: | ||
| 146 | case IR::Opcode::ShiftRightLogical64: | ||
| 147 | case IR::Opcode::ShiftRightArithmetic64: | ||
| 148 | case IR::Opcode::ConvertS64F16: | ||
| 149 | case IR::Opcode::ConvertS64F32: | ||
| 150 | case IR::Opcode::ConvertS64F64: | ||
| 151 | case IR::Opcode::ConvertU64F16: | ||
| 152 | case IR::Opcode::ConvertU64F32: | ||
| 153 | case IR::Opcode::ConvertU64F64: | ||
| 154 | case IR::Opcode::ConvertU64U32: | ||
| 155 | case IR::Opcode::ConvertU32U64: | ||
| 156 | case IR::Opcode::ConvertF16U64: | ||
| 157 | case IR::Opcode::ConvertF32U64: | ||
| 158 | case IR::Opcode::ConvertF64U64: | ||
| 159 | info.uses_int64 = true; | ||
| 160 | break; | ||
| 161 | default: | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | switch (inst.Opcode()) { | ||
| 165 | case IR::Opcode::WorkgroupId: | ||
| 166 | info.uses_workgroup_id = true; | ||
| 167 | break; | ||
| 168 | case IR::Opcode::LocalInvocationId: | ||
| 169 | info.uses_local_invocation_id = true; | ||
| 170 | break; | ||
| 171 | case IR::Opcode::GetCbufU8: | ||
| 172 | case IR::Opcode::GetCbufS8: | ||
| 173 | case IR::Opcode::GetCbufU16: | ||
| 174 | case IR::Opcode::GetCbufS16: | ||
| 175 | case IR::Opcode::GetCbufU32: | ||
| 176 | case IR::Opcode::GetCbufF32: | ||
| 177 | case IR::Opcode::GetCbufU64: { | ||
| 79 | if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { | 178 | if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { |
| 80 | AddConstantBufferDescriptor(info, index.U32(), 1); | 179 | AddConstantBufferDescriptor(info, index.U32(), 1); |
| 81 | } else { | 180 | } else { |
| 82 | throw NotImplementedException("Constant buffer with non-immediate index"); | 181 | throw NotImplementedException("Constant buffer with non-immediate index"); |
| 83 | } | 182 | } |
| 183 | switch (inst.Opcode()) { | ||
| 184 | case IR::Opcode::GetCbufU8: | ||
| 185 | case IR::Opcode::GetCbufS8: | ||
| 186 | info.used_constant_buffer_types |= IR::Type::U8; | ||
| 187 | break; | ||
| 188 | case IR::Opcode::GetCbufU16: | ||
| 189 | case IR::Opcode::GetCbufS16: | ||
| 190 | info.used_constant_buffer_types |= IR::Type::U16; | ||
| 191 | break; | ||
| 192 | case IR::Opcode::GetCbufU32: | ||
| 193 | info.used_constant_buffer_types |= IR::Type::U32; | ||
| 194 | break; | ||
| 195 | case IR::Opcode::GetCbufF32: | ||
| 196 | info.used_constant_buffer_types |= IR::Type::F32; | ||
| 197 | break; | ||
| 198 | case IR::Opcode::GetCbufU64: | ||
| 199 | info.used_constant_buffer_types |= IR::Type::U64; | ||
| 200 | break; | ||
| 201 | default: | ||
| 202 | break; | ||
| 203 | } | ||
| 84 | break; | 204 | break; |
| 205 | } | ||
| 85 | case IR::Opcode::BindlessImageSampleImplicitLod: | 206 | case IR::Opcode::BindlessImageSampleImplicitLod: |
| 86 | case IR::Opcode::BindlessImageSampleExplicitLod: | 207 | case IR::Opcode::BindlessImageSampleExplicitLod: |
| 87 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | 208 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ae3d5a7d6..7ba9ebe9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) { | |||
| 193 | // ISub32 is generally used to subtract two constant buffers, compare and replace this with | 193 | // ISub32 is generally used to subtract two constant buffers, compare and replace this with |
| 194 | // zero if they equal. | 194 | // zero if they equal. |
| 195 | const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { | 195 | const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { |
| 196 | return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && | 196 | return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && |
| 197 | a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); | 197 | a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); |
| 198 | }}; | 198 | }}; |
| 199 | IR::Inst* op_a{inst.Arg(0).InstRecursive()}; | 199 | IR::Inst* op_a{inst.Arg(0).InstRecursive()}; |
| @@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) { | |||
| 207 | // Canonicalize local variables to simplify the following logic | 207 | // Canonicalize local variables to simplify the following logic |
| 208 | std::swap(op_a, op_b); | 208 | std::swap(op_a, op_b); |
| 209 | } | 209 | } |
| 210 | if (op_b->Opcode() != IR::Opcode::GetCbuf) { | 210 | if (op_b->Opcode() != IR::Opcode::GetCbufU32) { |
| 211 | return; | 211 | return; |
| 212 | } | 212 | } |
| 213 | IR::Inst* const inst_cbuf{op_b}; | 213 | IR::Inst* const inst_cbuf{op_b}; |
| @@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) { | |||
| 277 | } | 277 | } |
| 278 | } | 278 | } |
| 279 | 279 | ||
| 280 | template <typename Dest, typename Source> | 280 | template <IR::Opcode op, typename Dest, typename Source> |
| 281 | void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | 281 | void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { |
| 282 | const IR::Value value{inst.Arg(0)}; | 282 | const IR::Value value{inst.Arg(0)}; |
| 283 | if (value.IsImmediate()) { | 283 | if (value.IsImmediate()) { |
| @@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | |||
| 285 | return; | 285 | return; |
| 286 | } | 286 | } |
| 287 | IR::Inst* const arg_inst{value.InstRecursive()}; | 287 | IR::Inst* const arg_inst{value.InstRecursive()}; |
| 288 | if (value.InstRecursive()->Opcode() == reverse) { | 288 | if (arg_inst->Opcode() == reverse) { |
| 289 | inst.ReplaceUsesWith(arg_inst->Arg(0)); | 289 | inst.ReplaceUsesWith(arg_inst->Arg(0)); |
| 290 | return; | ||
| 291 | } | ||
| 292 | if constexpr (op == IR::Opcode::BitCastF32U32) { | ||
| 293 | if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { | ||
| 294 | // Replace the bitcast with a typed constant buffer read | ||
| 295 | inst.ReplaceOpcode(IR::Opcode::GetCbufF32); | ||
| 296 | inst.SetArg(0, arg_inst->Arg(0)); | ||
| 297 | inst.SetArg(1, arg_inst->Arg(1)); | ||
| 298 | return; | ||
| 299 | } | ||
| 290 | } | 300 | } |
| 291 | } | 301 | } |
| 292 | 302 | ||
| @@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | |||
| 325 | case IR::Opcode::ISub32: | 335 | case IR::Opcode::ISub32: |
| 326 | return FoldISub32(inst); | 336 | return FoldISub32(inst); |
| 327 | case IR::Opcode::BitCastF32U32: | 337 | case IR::Opcode::BitCastF32U32: |
| 328 | return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32); | 338 | return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32); |
| 329 | case IR::Opcode::BitCastU32F32: | 339 | case IR::Opcode::BitCastU32F32: |
| 330 | return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32); | 340 | return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32); |
| 331 | case IR::Opcode::IAdd64: | 341 | case IR::Opcode::IAdd64: |
| 332 | return FoldAdd<u64>(block, inst); | 342 | return FoldAdd<u64>(block, inst); |
| 333 | case IR::Opcode::SelectU32: | 343 | case IR::Opcode::SelectU32: |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2625c0bb2..5d98d278e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -203,7 +203,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, | |||
| 203 | return std::nullopt; | 203 | return std::nullopt; |
| 204 | } | 204 | } |
| 205 | const IR::Inst* const inst{value.InstRecursive()}; | 205 | const IR::Inst* const inst{value.InstRecursive()}; |
| 206 | if (inst->Opcode() == IR::Opcode::GetCbuf) { | 206 | if (inst->Opcode() == IR::Opcode::GetCbufU32) { |
| 207 | const IR::Value index{inst->Arg(0)}; | 207 | const IR::Value index{inst->Arg(0)}; |
| 208 | const IR::Value offset{inst->Arg(1)}; | 208 | const IR::Value offset{inst->Arg(1)}; |
| 209 | if (!index.IsImmediate()) { | 209 | if (!index.IsImmediate()) { |
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 80e4ad6a9..ec802e02c 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -78,7 +78,7 @@ std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value, | |||
| 78 | return std::nullopt; | 78 | return std::nullopt; |
| 79 | } | 79 | } |
| 80 | const IR::Inst* const inst{value.InstRecursive()}; | 80 | const IR::Inst* const inst{value.InstRecursive()}; |
| 81 | if (inst->Opcode() == IR::Opcode::GetCbuf) { | 81 | if (inst->Opcode() == IR::Opcode::GetCbufU32) { |
| 82 | const IR::Value index{inst->Arg(0)}; | 82 | const IR::Value index{inst->Arg(0)}; |
| 83 | const IR::Value offset{inst->Arg(1)}; | 83 | const IR::Value offset{inst->Arg(1)}; |
| 84 | if (!index.IsImmediate()) { | 84 | if (!index.IsImmediate()) { |