summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2018-10-26 00:16:25 -0400
committerGravatar GitHub2018-10-26 00:16:25 -0400
commitd278f25bda79a2d2ed560a68f3983881c1a1e501 (patch)
tree1a24fe68cd4d72364eb6f0b6b6f5b420597a0f55
parentMerge pull request #1430 from DarkLordZach/remove-promote-dir (diff)
parentImplemented LD_L and ST_L (diff)
downloadyuzu-d278f25bda79a2d2ed560a68f3983881c1a1e501.tar.gz
yuzu-d278f25bda79a2d2ed560a68f3983881c1a1e501.tar.xz
yuzu-d278f25bda79a2d2ed560a68f3983881c1a1e501.zip
Merge pull request #1533 from FernandoS27/lmem
Implemented Shader Local Memory
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/shader_bytecode.h31
-rw-r--r--src/video_core/engines/shader_header.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp103
3 files changed, 138 insertions, 1 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index af7756266..141b9159b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -208,6 +208,16 @@ enum class UniformType : u64 {
208 Double = 5, 208 Double = 5,
209}; 209};
210 210
211enum class StoreType : u64 {
212 Unsigned8 = 0,
213 Signed8 = 1,
214 Unsigned16 = 2,
215 Signed16 = 3,
216 Bytes32 = 4,
217 Bytes64 = 5,
218 Bytes128 = 6,
219};
220
211enum class IMinMaxExchange : u64 { 221enum class IMinMaxExchange : u64 {
212 None = 0, 222 None = 0,
213 XLo = 1, 223 XLo = 1,
@@ -748,6 +758,18 @@ union Instruction {
748 } ld_c; 758 } ld_c;
749 759
750 union { 760 union {
761 BitField<48, 3, StoreType> type;
762 } ldst_sl;
763
764 union {
765 BitField<44, 2, u64> unknown;
766 } ld_l;
767
768 union {
769 BitField<44, 2, u64> unknown;
770 } st_l;
771
772 union {
751 BitField<0, 3, u64> pred0; 773 BitField<0, 3, u64> pred0;
752 BitField<3, 3, u64> pred3; 774 BitField<3, 3, u64> pred3;
753 BitField<7, 1, u64> abs_a; 775 BitField<7, 1, u64> abs_a;
@@ -1209,6 +1231,7 @@ union Instruction {
1209 BitField<61, 1, u64> is_b_imm; 1231 BitField<61, 1, u64> is_b_imm;
1210 BitField<60, 1, u64> is_b_gpr; 1232 BitField<60, 1, u64> is_b_gpr;
1211 BitField<59, 1, u64> is_c_gpr; 1233 BitField<59, 1, u64> is_c_gpr;
1234 BitField<20, 24, s64> smem_imm;
1212 1235
1213 Attribute attribute; 1236 Attribute attribute;
1214 Sampler sampler; 1237 Sampler sampler;
@@ -1232,8 +1255,12 @@ public:
1232 BRA, 1255 BRA,
1233 PBK, 1256 PBK,
1234 LD_A, 1257 LD_A,
1258 LD_L,
1259 LD_S,
1235 LD_C, 1260 LD_C,
1236 ST_A, 1261 ST_A,
1262 ST_L,
1263 ST_S,
1237 LDG, // Load from global memory 1264 LDG, // Load from global memory
1238 STG, // Store in global memory 1265 STG, // Store in global memory
1239 TEX, 1266 TEX,
@@ -1490,8 +1517,12 @@ private:
1490 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1517 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1491 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1518 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1492 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1519 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1520 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1521 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
1493 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), 1522 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
1494 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), 1523 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
1524 INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
1525 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1495 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1526 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1496 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1527 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1497 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1528 INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a885ee3cf..a0e015c4b 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -96,6 +96,11 @@ struct Header {
96 } 96 }
97 } ps; 97 } ps;
98 }; 98 };
99
100 u64 GetLocalMemorySize() {
101 return (common1.shader_local_memory_low_size |
102 (common2.shader_local_memory_high_size << 24));
103 }
99}; 104};
100 105
101static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); 106static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 81ffb24e4..dec291a7d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -278,7 +278,7 @@ public:
278 const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, 278 const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
279 const Tegra::Shader::Header& header) 279 const Tegra::Shader::Header& header)
280 : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, 280 : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
281 fixed_pipeline_output_attributes_used{} { 281 fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
282 BuildRegisterList(); 282 BuildRegisterList();
283 BuildInputList(); 283 BuildInputList();
284 } 284 }
@@ -436,6 +436,25 @@ public:
436 shader.AddLine(dest + " = " + src + ';'); 436 shader.AddLine(dest + " = " + src + ';');
437 } 437 }
438 438
439 std::string GetLocalMemoryAsFloat(const std::string& index) {
440 return "lmem[" + index + ']';
441 }
442
443 std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
444 const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
445 return func + "(lmem[" + index + "])";
446 }
447
448 void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) {
449 shader.AddLine("lmem[" + index + "] = " + value + ';');
450 }
451
452 void SetLocalMemoryAsInteger(const std::string& index, const std::string& value,
453 bool is_signed = false) {
454 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
455 shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
456 }
457
439 std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { 458 std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
440 switch (cc) { 459 switch (cc) {
441 case Tegra::Shader::ControlCode::NEU: 460 case Tegra::Shader::ControlCode::NEU:
@@ -533,6 +552,7 @@ public:
533 void GenerateDeclarations(const std::string& suffix) { 552 void GenerateDeclarations(const std::string& suffix) {
534 GenerateVertex(); 553 GenerateVertex();
535 GenerateRegisters(suffix); 554 GenerateRegisters(suffix);
555 GenerateLocalMemory();
536 GenerateInternalFlags(); 556 GenerateInternalFlags();
537 GenerateInputAttrs(); 557 GenerateInputAttrs();
538 GenerateOutputAttrs(); 558 GenerateOutputAttrs();
@@ -578,6 +598,10 @@ public:
578 return entry.GetName(); 598 return entry.GetName();
579 } 599 }
580 600
601 void SetLocalMemory(u64 lmem) {
602 local_memory_size = lmem;
603 }
604
581private: 605private:
582 /// Generates declarations for registers. 606 /// Generates declarations for registers.
583 void GenerateRegisters(const std::string& suffix) { 607 void GenerateRegisters(const std::string& suffix) {
@@ -588,6 +612,15 @@ private:
588 declarations.AddNewLine(); 612 declarations.AddNewLine();
589 } 613 }
590 614
615 /// Generates declarations for local memory.
616 void GenerateLocalMemory() {
617 if (local_memory_size > 0) {
618 declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) +
619 "];");
620 declarations.AddNewLine();
621 }
622 }
623
591 /// Generates declarations for internal flags. 624 /// Generates declarations for internal flags.
592 void GenerateInternalFlags() { 625 void GenerateInternalFlags() {
593 for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { 626 for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
@@ -895,6 +928,7 @@ private:
895 const std::string& suffix; 928 const std::string& suffix;
896 const Tegra::Shader::Header& header; 929 const Tegra::Shader::Header& header;
897 std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; 930 std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
931 u64 local_memory_size;
898}; 932};
899 933
900class GLSLGenerator { 934class GLSLGenerator {
@@ -904,6 +938,8 @@ public:
904 : subroutines(subroutines), program_code(program_code), main_offset(main_offset), 938 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
905 stage(stage), suffix(suffix) { 939 stage(stage), suffix(suffix) {
906 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 940 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
941 local_memory_size = header.GetLocalMemorySize();
942 regs.SetLocalMemory(local_memory_size);
907 Generate(suffix); 943 Generate(suffix);
908 } 944 }
909 945
@@ -2324,6 +2360,39 @@ private:
2324 shader.AddLine("}"); 2360 shader.AddLine("}");
2325 break; 2361 break;
2326 } 2362 }
2363 case OpCode::Id::LD_L: {
2364 // Add an extra scope and declare the index register inside to prevent
2365 // overwriting it in case it is used as an output of the LD instruction.
2366 shader.AddLine('{');
2367 ++shader.scope;
2368
2369 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
2370 std::to_string(instr.smem_imm.Value()) + ')';
2371
2372 shader.AddLine("uint index = (" + op + " / 4);");
2373
2374 const std::string op_a = regs.GetLocalMemoryAsFloat("index");
2375
2376 if (instr.ld_l.unknown != 1) {
2377 LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}",
2378 static_cast<unsigned>(instr.ld_l.unknown.Value()));
2379 UNREACHABLE();
2380 }
2381
2382 switch (instr.ldst_sl.type.Value()) {
2383 case Tegra::Shader::StoreType::Bytes32:
2384 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2385 break;
2386 default:
2387 LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}",
2388 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2389 UNREACHABLE();
2390 }
2391
2392 --shader.scope;
2393 shader.AddLine('}');
2394 break;
2395 }
2327 case OpCode::Id::ST_A: { 2396 case OpCode::Id::ST_A: {
2328 ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, 2397 ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
2329 "Indirect attribute loads are not supported"); 2398 "Indirect attribute loads are not supported");
@@ -2352,6 +2421,37 @@ private:
2352 2421
2353 break; 2422 break;
2354 } 2423 }
2424 case OpCode::Id::ST_L: {
2425 // Add an extra scope and declare the index register inside to prevent
2426 // overwriting it in case it is used as an output of the LD instruction.
2427 shader.AddLine('{');
2428 ++shader.scope;
2429
2430 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
2431 std::to_string(instr.smem_imm.Value()) + ')';
2432
2433 shader.AddLine("uint index = (" + op + " / 4);");
2434
2435 if (instr.st_l.unknown != 0) {
2436 LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}",
2437 static_cast<unsigned>(instr.st_l.unknown.Value()));
2438 UNREACHABLE();
2439 }
2440
2441 switch (instr.ldst_sl.type.Value()) {
2442 case Tegra::Shader::StoreType::Bytes32:
2443 regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
2444 break;
2445 default:
2446 LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}",
2447 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2448 UNREACHABLE();
2449 }
2450
2451 --shader.scope;
2452 shader.AddLine('}');
2453 break;
2454 }
2355 case OpCode::Id::TEX: { 2455 case OpCode::Id::TEX: {
2356 Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; 2456 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
2357 std::string coord; 2457 std::string coord;
@@ -3575,6 +3675,7 @@ private:
3575 const u32 main_offset; 3675 const u32 main_offset;
3576 Maxwell3D::Regs::ShaderStage stage; 3676 Maxwell3D::Regs::ShaderStage stage;
3577 const std::string& suffix; 3677 const std::string& suffix;
3678 u64 local_memory_size;
3578 3679
3579 ShaderWriter shader; 3680 ShaderWriter shader;
3580 ShaderWriter declarations; 3681 ShaderWriter declarations;