diff options
| author | 2018-10-26 00:16:25 -0400 | |
|---|---|---|
| committer | 2018-10-26 00:16:25 -0400 | |
| commit | d278f25bda79a2d2ed560a68f3983881c1a1e501 (patch) | |
| tree | 1a24fe68cd4d72364eb6f0b6b6f5b420597a0f55 /src | |
| parent | Merge pull request #1430 from DarkLordZach/remove-promote-dir (diff) | |
| parent | Implemented LD_L and ST_L (diff) | |
| download | yuzu-d278f25bda79a2d2ed560a68f3983881c1a1e501.tar.gz yuzu-d278f25bda79a2d2ed560a68f3983881c1a1e501.tar.xz yuzu-d278f25bda79a2d2ed560a68f3983881c1a1e501.zip | |
Merge pull request #1533 from FernandoS27/lmem
Implemented Shader Local Memory
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 31 | ||||
| -rw-r--r-- | src/video_core/engines/shader_header.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 103 |
3 files changed, 138 insertions, 1 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index af7756266..141b9159b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -208,6 +208,16 @@ enum class UniformType : u64 { | |||
| 208 | Double = 5, | 208 | Double = 5, |
| 209 | }; | 209 | }; |
| 210 | 210 | ||
| 211 | enum class StoreType : u64 { | ||
| 212 | Unsigned8 = 0, | ||
| 213 | Signed8 = 1, | ||
| 214 | Unsigned16 = 2, | ||
| 215 | Signed16 = 3, | ||
| 216 | Bytes32 = 4, | ||
| 217 | Bytes64 = 5, | ||
| 218 | Bytes128 = 6, | ||
| 219 | }; | ||
| 220 | |||
| 211 | enum class IMinMaxExchange : u64 { | 221 | enum class IMinMaxExchange : u64 { |
| 212 | None = 0, | 222 | None = 0, |
| 213 | XLo = 1, | 223 | XLo = 1, |
| @@ -748,6 +758,18 @@ union Instruction { | |||
| 748 | } ld_c; | 758 | } ld_c; |
| 749 | 759 | ||
| 750 | union { | 760 | union { |
| 761 | BitField<48, 3, StoreType> type; | ||
| 762 | } ldst_sl; | ||
| 763 | |||
| 764 | union { | ||
| 765 | BitField<44, 2, u64> unknown; | ||
| 766 | } ld_l; | ||
| 767 | |||
| 768 | union { | ||
| 769 | BitField<44, 2, u64> unknown; | ||
| 770 | } st_l; | ||
| 771 | |||
| 772 | union { | ||
| 751 | BitField<0, 3, u64> pred0; | 773 | BitField<0, 3, u64> pred0; |
| 752 | BitField<3, 3, u64> pred3; | 774 | BitField<3, 3, u64> pred3; |
| 753 | BitField<7, 1, u64> abs_a; | 775 | BitField<7, 1, u64> abs_a; |
| @@ -1209,6 +1231,7 @@ union Instruction { | |||
| 1209 | BitField<61, 1, u64> is_b_imm; | 1231 | BitField<61, 1, u64> is_b_imm; |
| 1210 | BitField<60, 1, u64> is_b_gpr; | 1232 | BitField<60, 1, u64> is_b_gpr; |
| 1211 | BitField<59, 1, u64> is_c_gpr; | 1233 | BitField<59, 1, u64> is_c_gpr; |
| 1234 | BitField<20, 24, s64> smem_imm; | ||
| 1212 | 1235 | ||
| 1213 | Attribute attribute; | 1236 | Attribute attribute; |
| 1214 | Sampler sampler; | 1237 | Sampler sampler; |
| @@ -1232,8 +1255,12 @@ public: | |||
| 1232 | BRA, | 1255 | BRA, |
| 1233 | PBK, | 1256 | PBK, |
| 1234 | LD_A, | 1257 | LD_A, |
| 1258 | LD_L, | ||
| 1259 | LD_S, | ||
| 1235 | LD_C, | 1260 | LD_C, |
| 1236 | ST_A, | 1261 | ST_A, |
| 1262 | ST_L, | ||
| 1263 | ST_S, | ||
| 1237 | LDG, // Load from global memory | 1264 | LDG, // Load from global memory |
| 1238 | STG, // Store in global memory | 1265 | STG, // Store in global memory |
| 1239 | TEX, | 1266 | TEX, |
| @@ -1490,8 +1517,12 @@ private: | |||
| 1490 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | 1517 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), |
| 1491 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1518 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1492 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1519 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1520 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | ||
| 1521 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | ||
| 1493 | INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), | 1522 | INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), |
| 1494 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), | 1523 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), |
| 1524 | INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), | ||
| 1525 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | ||
| 1495 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | 1526 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), |
| 1496 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1527 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1497 | INST("110000----111---", Id::TEX, Type::Memory, "TEX"), | 1528 | INST("110000----111---", Id::TEX, Type::Memory, "TEX"), |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index a885ee3cf..a0e015c4b 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h | |||
| @@ -96,6 +96,11 @@ struct Header { | |||
| 96 | } | 96 | } |
| 97 | } ps; | 97 | } ps; |
| 98 | }; | 98 | }; |
| 99 | |||
| 100 | u64 GetLocalMemorySize() { | ||
| 101 | return (common1.shader_local_memory_low_size | | ||
| 102 | (common2.shader_local_memory_high_size << 24)); | ||
| 103 | } | ||
| 99 | }; | 104 | }; |
| 100 | 105 | ||
| 101 | static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); | 106 | static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 81ffb24e4..dec291a7d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -278,7 +278,7 @@ public: | |||
| 278 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, | 278 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, |
| 279 | const Tegra::Shader::Header& header) | 279 | const Tegra::Shader::Header& header) |
| 280 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, | 280 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, |
| 281 | fixed_pipeline_output_attributes_used{} { | 281 | fixed_pipeline_output_attributes_used{}, local_memory_size{0} { |
| 282 | BuildRegisterList(); | 282 | BuildRegisterList(); |
| 283 | BuildInputList(); | 283 | BuildInputList(); |
| 284 | } | 284 | } |
| @@ -436,6 +436,25 @@ public: | |||
| 436 | shader.AddLine(dest + " = " + src + ';'); | 436 | shader.AddLine(dest + " = " + src + ';'); |
| 437 | } | 437 | } |
| 438 | 438 | ||
| 439 | std::string GetLocalMemoryAsFloat(const std::string& index) { | ||
| 440 | return "lmem[" + index + ']'; | ||
| 441 | } | ||
| 442 | |||
| 443 | std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { | ||
| 444 | const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; | ||
| 445 | return func + "(lmem[" + index + "])"; | ||
| 446 | } | ||
| 447 | |||
| 448 | void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { | ||
| 449 | shader.AddLine("lmem[" + index + "] = " + value + ';'); | ||
| 450 | } | ||
| 451 | |||
| 452 | void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, | ||
| 453 | bool is_signed = false) { | ||
| 454 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 455 | shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); | ||
| 456 | } | ||
| 457 | |||
| 439 | std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { | 458 | std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { |
| 440 | switch (cc) { | 459 | switch (cc) { |
| 441 | case Tegra::Shader::ControlCode::NEU: | 460 | case Tegra::Shader::ControlCode::NEU: |
| @@ -533,6 +552,7 @@ public: | |||
| 533 | void GenerateDeclarations(const std::string& suffix) { | 552 | void GenerateDeclarations(const std::string& suffix) { |
| 534 | GenerateVertex(); | 553 | GenerateVertex(); |
| 535 | GenerateRegisters(suffix); | 554 | GenerateRegisters(suffix); |
| 555 | GenerateLocalMemory(); | ||
| 536 | GenerateInternalFlags(); | 556 | GenerateInternalFlags(); |
| 537 | GenerateInputAttrs(); | 557 | GenerateInputAttrs(); |
| 538 | GenerateOutputAttrs(); | 558 | GenerateOutputAttrs(); |
| @@ -578,6 +598,10 @@ public: | |||
| 578 | return entry.GetName(); | 598 | return entry.GetName(); |
| 579 | } | 599 | } |
| 580 | 600 | ||
| 601 | void SetLocalMemory(u64 lmem) { | ||
| 602 | local_memory_size = lmem; | ||
| 603 | } | ||
| 604 | |||
| 581 | private: | 605 | private: |
| 582 | /// Generates declarations for registers. | 606 | /// Generates declarations for registers. |
| 583 | void GenerateRegisters(const std::string& suffix) { | 607 | void GenerateRegisters(const std::string& suffix) { |
| @@ -588,6 +612,15 @@ private: | |||
| 588 | declarations.AddNewLine(); | 612 | declarations.AddNewLine(); |
| 589 | } | 613 | } |
| 590 | 614 | ||
| 615 | /// Generates declarations for local memory. | ||
| 616 | void GenerateLocalMemory() { | ||
| 617 | if (local_memory_size > 0) { | ||
| 618 | declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + | ||
| 619 | "];"); | ||
| 620 | declarations.AddNewLine(); | ||
| 621 | } | ||
| 622 | } | ||
| 623 | |||
| 591 | /// Generates declarations for internal flags. | 624 | /// Generates declarations for internal flags. |
| 592 | void GenerateInternalFlags() { | 625 | void GenerateInternalFlags() { |
| 593 | for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { | 626 | for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { |
| @@ -895,6 +928,7 @@ private: | |||
| 895 | const std::string& suffix; | 928 | const std::string& suffix; |
| 896 | const Tegra::Shader::Header& header; | 929 | const Tegra::Shader::Header& header; |
| 897 | std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; | 930 | std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; |
| 931 | u64 local_memory_size; | ||
| 898 | }; | 932 | }; |
| 899 | 933 | ||
| 900 | class GLSLGenerator { | 934 | class GLSLGenerator { |
| @@ -904,6 +938,8 @@ public: | |||
| 904 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | 938 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), |
| 905 | stage(stage), suffix(suffix) { | 939 | stage(stage), suffix(suffix) { |
| 906 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 940 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 941 | local_memory_size = header.GetLocalMemorySize(); | ||
| 942 | regs.SetLocalMemory(local_memory_size); | ||
| 907 | Generate(suffix); | 943 | Generate(suffix); |
| 908 | } | 944 | } |
| 909 | 945 | ||
| @@ -2324,6 +2360,39 @@ private: | |||
| 2324 | shader.AddLine("}"); | 2360 | shader.AddLine("}"); |
| 2325 | break; | 2361 | break; |
| 2326 | } | 2362 | } |
| 2363 | case OpCode::Id::LD_L: { | ||
| 2364 | // Add an extra scope and declare the index register inside to prevent | ||
| 2365 | // overwriting it in case it is used as an output of the LD instruction. | ||
| 2366 | shader.AddLine('{'); | ||
| 2367 | ++shader.scope; | ||
| 2368 | |||
| 2369 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | ||
| 2370 | std::to_string(instr.smem_imm.Value()) + ')'; | ||
| 2371 | |||
| 2372 | shader.AddLine("uint index = (" + op + " / 4);"); | ||
| 2373 | |||
| 2374 | const std::string op_a = regs.GetLocalMemoryAsFloat("index"); | ||
| 2375 | |||
| 2376 | if (instr.ld_l.unknown != 1) { | ||
| 2377 | LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}", | ||
| 2378 | static_cast<unsigned>(instr.ld_l.unknown.Value())); | ||
| 2379 | UNREACHABLE(); | ||
| 2380 | } | ||
| 2381 | |||
| 2382 | switch (instr.ldst_sl.type.Value()) { | ||
| 2383 | case Tegra::Shader::StoreType::Bytes32: | ||
| 2384 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2385 | break; | ||
| 2386 | default: | ||
| 2387 | LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}", | ||
| 2388 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2389 | UNREACHABLE(); | ||
| 2390 | } | ||
| 2391 | |||
| 2392 | --shader.scope; | ||
| 2393 | shader.AddLine('}'); | ||
| 2394 | break; | ||
| 2395 | } | ||
| 2327 | case OpCode::Id::ST_A: { | 2396 | case OpCode::Id::ST_A: { |
| 2328 | ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, | 2397 | ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, |
| 2329 | "Indirect attribute loads are not supported"); | 2398 | "Indirect attribute loads are not supported"); |
| @@ -2352,6 +2421,37 @@ private: | |||
| 2352 | 2421 | ||
| 2353 | break; | 2422 | break; |
| 2354 | } | 2423 | } |
| 2424 | case OpCode::Id::ST_L: { | ||
| 2425 | // Add an extra scope and declare the index register inside to prevent | ||
| 2426 | // overwriting it in case it is used as an output of the LD instruction. | ||
| 2427 | shader.AddLine('{'); | ||
| 2428 | ++shader.scope; | ||
| 2429 | |||
| 2430 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | ||
| 2431 | std::to_string(instr.smem_imm.Value()) + ')'; | ||
| 2432 | |||
| 2433 | shader.AddLine("uint index = (" + op + " / 4);"); | ||
| 2434 | |||
| 2435 | if (instr.st_l.unknown != 0) { | ||
| 2436 | LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}", | ||
| 2437 | static_cast<unsigned>(instr.st_l.unknown.Value())); | ||
| 2438 | UNREACHABLE(); | ||
| 2439 | } | ||
| 2440 | |||
| 2441 | switch (instr.ldst_sl.type.Value()) { | ||
| 2442 | case Tegra::Shader::StoreType::Bytes32: | ||
| 2443 | regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); | ||
| 2444 | break; | ||
| 2445 | default: | ||
| 2446 | LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}", | ||
| 2447 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2448 | UNREACHABLE(); | ||
| 2449 | } | ||
| 2450 | |||
| 2451 | --shader.scope; | ||
| 2452 | shader.AddLine('}'); | ||
| 2453 | break; | ||
| 2454 | } | ||
| 2355 | case OpCode::Id::TEX: { | 2455 | case OpCode::Id::TEX: { |
| 2356 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | 2456 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; |
| 2357 | std::string coord; | 2457 | std::string coord; |
| @@ -3575,6 +3675,7 @@ private: | |||
| 3575 | const u32 main_offset; | 3675 | const u32 main_offset; |
| 3576 | Maxwell3D::Regs::ShaderStage stage; | 3676 | Maxwell3D::Regs::ShaderStage stage; |
| 3577 | const std::string& suffix; | 3677 | const std::string& suffix; |
| 3678 | u64 local_memory_size; | ||
| 3578 | 3679 | ||
| 3579 | ShaderWriter shader; | 3680 | ShaderWriter shader; |
| 3580 | ShaderWriter declarations; | 3681 | ShaderWriter declarations; |