diff options
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 165 |
2 files changed, 108 insertions, 75 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ac50bb622..6cd08d28b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -214,7 +214,7 @@ enum class IMinMaxExchange : u64 { | |||
| 214 | XHi = 3, | 214 | XHi = 3, |
| 215 | }; | 215 | }; |
| 216 | 216 | ||
| 217 | enum class VmadType : u64 { | 217 | enum class VideoType : u64 { |
| 218 | Size16_Low = 0, | 218 | Size16_Low = 0, |
| 219 | Size16_High = 1, | 219 | Size16_High = 1, |
| 220 | Size32 = 2, | 220 | Size32 = 2, |
| @@ -783,6 +783,14 @@ union Instruction { | |||
| 783 | } psetp; | 783 | } psetp; |
| 784 | 784 | ||
| 785 | union { | 785 | union { |
| 786 | BitField<43, 4, PredCondition> cond; | ||
| 787 | BitField<45, 2, PredOperation> op; | ||
| 788 | BitField<3, 3, u64> pred3; | ||
| 789 | BitField<0, 3, u64> pred0; | ||
| 790 | BitField<39, 3, u64> pred39; | ||
| 791 | } vsetp; | ||
| 792 | |||
| 793 | union { | ||
| 786 | BitField<12, 3, u64> pred12; | 794 | BitField<12, 3, u64> pred12; |
| 787 | BitField<15, 1, u64> neg_pred12; | 795 | BitField<15, 1, u64> neg_pred12; |
| 788 | BitField<24, 2, PredOperation> cond; | 796 | BitField<24, 2, PredOperation> cond; |
| @@ -1154,15 +1162,17 @@ union Instruction { | |||
| 1154 | union { | 1162 | union { |
| 1155 | BitField<48, 1, u64> signed_a; | 1163 | BitField<48, 1, u64> signed_a; |
| 1156 | BitField<38, 1, u64> is_byte_chunk_a; | 1164 | BitField<38, 1, u64> is_byte_chunk_a; |
| 1157 | BitField<36, 2, VmadType> type_a; | 1165 | BitField<36, 2, VideoType> type_a; |
| 1158 | BitField<36, 2, u64> byte_height_a; | 1166 | BitField<36, 2, u64> byte_height_a; |
| 1159 | 1167 | ||
| 1160 | BitField<49, 1, u64> signed_b; | 1168 | BitField<49, 1, u64> signed_b; |
| 1161 | BitField<50, 1, u64> use_register_b; | 1169 | BitField<50, 1, u64> use_register_b; |
| 1162 | BitField<30, 1, u64> is_byte_chunk_b; | 1170 | BitField<30, 1, u64> is_byte_chunk_b; |
| 1163 | BitField<28, 2, VmadType> type_b; | 1171 | BitField<28, 2, VideoType> type_b; |
| 1164 | BitField<28, 2, u64> byte_height_b; | 1172 | BitField<28, 2, u64> byte_height_b; |
| 1173 | } video; | ||
| 1165 | 1174 | ||
| 1175 | union { | ||
| 1166 | BitField<51, 2, VmadShr> shr; | 1176 | BitField<51, 2, VmadShr> shr; |
| 1167 | BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) | 1177 | BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) |
| 1168 | BitField<47, 1, u64> cc; | 1178 | BitField<47, 1, u64> cc; |
| @@ -1238,6 +1248,7 @@ public: | |||
| 1238 | OUT_R, // Emit vertex/primitive | 1248 | OUT_R, // Emit vertex/primitive |
| 1239 | ISBERD, | 1249 | ISBERD, |
| 1240 | VMAD, | 1250 | VMAD, |
| 1251 | VSETP, | ||
| 1241 | FFMA_IMM, // Fused Multiply and Add | 1252 | FFMA_IMM, // Fused Multiply and Add |
| 1242 | FFMA_CR, | 1253 | FFMA_CR, |
| 1243 | FFMA_RC, | 1254 | FFMA_RC, |
| @@ -1495,6 +1506,7 @@ private: | |||
| 1495 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1506 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1496 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1507 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| 1497 | INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), | 1508 | INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), |
| 1509 | INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"), | ||
| 1498 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | 1510 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), |
| 1499 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | 1511 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), |
| 1500 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | 1512 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7a019fc86..fe4d1bd83 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1312,6 +1312,63 @@ private: | |||
| 1312 | } | 1312 | } |
| 1313 | } | 1313 | } |
| 1314 | 1314 | ||
| 1315 | /// Unpacks a video instruction operand (e.g. VMAD). | ||
| 1316 | std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed, | ||
| 1317 | Tegra::Shader::VideoType type, u64 byte_height) { | ||
| 1318 | const std::string value = [&]() { | ||
| 1319 | if (!is_chunk) { | ||
| 1320 | const auto offset = static_cast<u32>(byte_height * 8); | ||
| 1321 | return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; | ||
| 1322 | } | ||
| 1323 | const std::string zero = "0"; | ||
| 1324 | |||
| 1325 | switch (type) { | ||
| 1326 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 1327 | return '(' + op + " & 0xffff)"; | ||
| 1328 | case Tegra::Shader::VideoType::Size16_High: | ||
| 1329 | return '(' + op + " >> 16)"; | ||
| 1330 | case Tegra::Shader::VideoType::Size32: | ||
| 1331 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||
| 1332 | // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||
| 1333 | // explanation is found: assert. | ||
| 1334 | UNIMPLEMENTED(); | ||
| 1335 | return zero; | ||
| 1336 | case Tegra::Shader::VideoType::Invalid: | ||
| 1337 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 1338 | return zero; | ||
| 1339 | default: | ||
| 1340 | UNREACHABLE(); | ||
| 1341 | return zero; | ||
| 1342 | } | ||
| 1343 | }(); | ||
| 1344 | |||
| 1345 | if (is_signed) { | ||
| 1346 | return "int(" + value + ')'; | ||
| 1347 | } | ||
| 1348 | return value; | ||
| 1349 | }; | ||
| 1350 | |||
| 1351 | /// Gets the A operand for a video instruction. | ||
| 1352 | std::string GetVideoOperandA(Instruction instr) { | ||
| 1353 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||
| 1354 | instr.video.is_byte_chunk_a != 0, instr.video.signed_a, | ||
| 1355 | instr.video.type_a, instr.video.byte_height_a); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | /// Gets the B operand for a video instruction. | ||
| 1359 | std::string GetVideoOperandB(Instruction instr) { | ||
| 1360 | if (instr.video.use_register_b) { | ||
| 1361 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 1362 | instr.video.is_byte_chunk_b != 0, instr.video.signed_b, | ||
| 1363 | instr.video.type_b, instr.video.byte_height_b); | ||
| 1364 | } else { | ||
| 1365 | return '(' + | ||
| 1366 | std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||
| 1367 | : instr.alu.GetImm20_16()) + | ||
| 1368 | ')'; | ||
| 1369 | } | ||
| 1370 | } | ||
| 1371 | |||
| 1315 | /** | 1372 | /** |
| 1316 | * Compiles a single instruction from Tegra to GLSL. | 1373 | * Compiles a single instruction from Tegra to GLSL. |
| 1317 | * @param offset the offset of the Tegra shader instruction. | 1374 | * @param offset the offset of the Tegra shader instruction. |
| @@ -3321,87 +3378,51 @@ private: | |||
| 3321 | break; | 3378 | break; |
| 3322 | } | 3379 | } |
| 3323 | case OpCode::Id::VMAD: { | 3380 | case OpCode::Id::VMAD: { |
| 3324 | const bool signed_a = instr.vmad.signed_a == 1; | 3381 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; |
| 3325 | const bool signed_b = instr.vmad.signed_b == 1; | 3382 | const std::string op_a = GetVideoOperandA(instr); |
| 3326 | const bool result_signed = signed_a || signed_b; | 3383 | const std::string op_b = GetVideoOperandB(instr); |
| 3327 | boost::optional<std::string> forced_result; | ||
| 3328 | |||
| 3329 | auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed, | ||
| 3330 | Tegra::Shader::VmadType type, u64 byte_height) { | ||
| 3331 | const std::string value = [&]() { | ||
| 3332 | if (!is_chunk) { | ||
| 3333 | const auto shift = static_cast<u32>(byte_height * 8); | ||
| 3334 | return "((" + op + " >> " + std::to_string(shift) + ") & 0xff)"; | ||
| 3335 | } | ||
| 3336 | const std::string zero = "0"; | ||
| 3337 | |||
| 3338 | switch (type) { | ||
| 3339 | case Tegra::Shader::VmadType::Size16_Low: | ||
| 3340 | return '(' + op + " & 0xffff)"; | ||
| 3341 | case Tegra::Shader::VmadType::Size16_High: | ||
| 3342 | return '(' + op + " >> 16)"; | ||
| 3343 | case Tegra::Shader::VmadType::Size32: | ||
| 3344 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||
| 3345 | // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||
| 3346 | // explanation is found: assert. | ||
| 3347 | UNREACHABLE_MSG("Unimplemented"); | ||
| 3348 | return zero; | ||
| 3349 | case Tegra::Shader::VmadType::Invalid: | ||
| 3350 | // Note(Rodrigo): This flag is invalid according to nvdisasm. From my | ||
| 3351 | // testing (even though it's invalid) this makes the whole instruction | ||
| 3352 | // assign zero to target register. | ||
| 3353 | forced_result = boost::make_optional(zero); | ||
| 3354 | return zero; | ||
| 3355 | default: | ||
| 3356 | UNREACHABLE(); | ||
| 3357 | return zero; | ||
| 3358 | } | ||
| 3359 | }(); | ||
| 3360 | |||
| 3361 | if (is_signed) { | ||
| 3362 | return "int(" + value + ')'; | ||
| 3363 | } | ||
| 3364 | return value; | ||
| 3365 | }; | ||
| 3366 | |||
| 3367 | const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||
| 3368 | instr.vmad.is_byte_chunk_a != 0, signed_a, | ||
| 3369 | instr.vmad.type_a, instr.vmad.byte_height_a); | ||
| 3370 | |||
| 3371 | std::string op_b; | ||
| 3372 | if (instr.vmad.use_register_b) { | ||
| 3373 | op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3374 | instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b, | ||
| 3375 | instr.vmad.byte_height_b); | ||
| 3376 | } else { | ||
| 3377 | op_b = '(' + | ||
| 3378 | std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||
| 3379 | : instr.alu.GetImm20_16()) + | ||
| 3380 | ')'; | ||
| 3381 | } | ||
| 3382 | |||
| 3383 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); | 3384 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); |
| 3384 | 3385 | ||
| 3385 | std::string result; | 3386 | std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; |
| 3386 | if (forced_result) { | ||
| 3387 | result = *forced_result; | ||
| 3388 | } else { | ||
| 3389 | result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 3390 | 3387 | ||
| 3391 | switch (instr.vmad.shr) { | 3388 | switch (instr.vmad.shr) { |
| 3392 | case Tegra::Shader::VmadShr::Shr7: | 3389 | case Tegra::Shader::VmadShr::Shr7: |
| 3393 | result = '(' + result + " >> 7)"; | 3390 | result = '(' + result + " >> 7)"; |
| 3394 | break; | 3391 | break; |
| 3395 | case Tegra::Shader::VmadShr::Shr15: | 3392 | case Tegra::Shader::VmadShr::Shr15: |
| 3396 | result = '(' + result + " >> 15)"; | 3393 | result = '(' + result + " >> 15)"; |
| 3397 | break; | 3394 | break; |
| 3398 | } | ||
| 3399 | } | 3395 | } |
| 3396 | |||
| 3400 | regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, | 3397 | regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, |
| 3401 | instr.vmad.saturate == 1, 0, Register::Size::Word, | 3398 | instr.vmad.saturate == 1, 0, Register::Size::Word, |
| 3402 | instr.vmad.cc); | 3399 | instr.vmad.cc); |
| 3403 | break; | 3400 | break; |
| 3404 | } | 3401 | } |
| 3402 | case OpCode::Id::VSETP: { | ||
| 3403 | const std::string op_a = GetVideoOperandA(instr); | ||
| 3404 | const std::string op_b = GetVideoOperandB(instr); | ||
| 3405 | |||
| 3406 | // We can't use the constant predicate as destination. | ||
| 3407 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3408 | |||
| 3409 | const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false); | ||
| 3410 | |||
| 3411 | const std::string combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 3412 | |||
| 3413 | const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b); | ||
| 3414 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3415 | SetPredicate(instr.vsetp.pred3, | ||
| 3416 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3417 | |||
| 3418 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3419 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3420 | // if enabled | ||
| 3421 | SetPredicate(instr.vsetp.pred0, | ||
| 3422 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3423 | } | ||
| 3424 | break; | ||
| 3425 | } | ||
| 3405 | default: { | 3426 | default: { |
| 3406 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); | 3427 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); |
| 3407 | UNREACHABLE(); | 3428 | UNREACHABLE(); |