summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-06-06 18:56:55 -0400
committerGravatar GitHub2018-06-06 18:56:55 -0400
commitcfc9effa6c8d6b93b4fee6266c6524cacb86755b (patch)
tree3e6968e43a2377a0bfd8bb45f3ea8141f9bd0568 /src
parentnvdrv/devices/nvidia_ctrl_gpu : add IoctlCommands with their params (#524) (diff)
parentgl_shader_decompiler: Implement LD_C instruction. (diff)
downloadyuzu-cfc9effa6c8d6b93b4fee6266c6524cacb86755b.tar.gz
yuzu-cfc9effa6c8d6b93b4fee6266c6524cacb86755b.tar.xz
yuzu-cfc9effa6c8d6b93b4fee6266c6524cacb86755b.zip
Merge pull request #532 from bunnei/ld_c
gl_shader_decompiler: Implement LD_C instruction.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp77
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h18
4 files changed, 110 insertions, 28 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 4eb507325..af18c2d81 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -109,11 +109,6 @@ union Sampler {
109 u64 value{}; 109 u64 value{};
110}; 110};
111 111
112union Uniform {
113 BitField<20, 14, u64> offset;
114 BitField<34, 5, u64> index;
115};
116
117} // namespace Shader 112} // namespace Shader
118} // namespace Tegra 113} // namespace Tegra
119 114
@@ -180,6 +175,15 @@ enum class FloatRoundingOp : u64 {
180 Trunc = 3, 175 Trunc = 3,
181}; 176};
182 177
178enum class UniformType : u64 {
179 UnsignedByte = 0,
180 SignedByte = 1,
181 UnsignedShort = 2,
182 SignedShort = 3,
183 Single = 4,
184 Double = 5,
185};
186
183union Instruction { 187union Instruction {
184 Instruction& operator=(const Instruction& instr) { 188 Instruction& operator=(const Instruction& instr) {
185 value = instr.value; 189 value = instr.value;
@@ -258,6 +262,11 @@ union Instruction {
258 } ffma; 262 } ffma;
259 263
260 union { 264 union {
265 BitField<48, 3, UniformType> type;
266 BitField<44, 2, u64> unknown;
267 } ld_c;
268
269 union {
261 BitField<0, 3, u64> pred0; 270 BitField<0, 3, u64> pred0;
262 BitField<3, 3, u64> pred3; 271 BitField<3, 3, u64> pred3;
263 BitField<7, 1, u64> abs_a; 272 BitField<7, 1, u64> abs_a;
@@ -354,12 +363,21 @@ union Instruction {
354 } 363 }
355 } bra; 364 } bra;
356 365
366 union {
367 BitField<20, 14, u64> offset;
368 BitField<34, 5, u64> index;
369 } cbuf34;
370
371 union {
372 BitField<20, 16, s64> offset;
373 BitField<36, 5, u64> index;
374 } cbuf36;
375
357 BitField<61, 1, u64> is_b_imm; 376 BitField<61, 1, u64> is_b_imm;
358 BitField<60, 1, u64> is_b_gpr; 377 BitField<60, 1, u64> is_b_gpr;
359 BitField<59, 1, u64> is_c_gpr; 378 BitField<59, 1, u64> is_c_gpr;
360 379
361 Attribute attribute; 380 Attribute attribute;
362 Uniform uniform;
363 Sampler sampler; 381 Sampler sampler;
364 382
365 u64 value; 383 u64 value;
@@ -374,6 +392,7 @@ public:
374 KIL, 392 KIL,
375 BRA, 393 BRA,
376 LD_A, 394 LD_A,
395 LD_C,
377 ST_A, 396 ST_A,
378 TEX, 397 TEX,
379 TEXQ, // Texture Query 398 TEXQ, // Texture Query
@@ -548,6 +567,7 @@ private:
548 INST("111000110011----", Id::KIL, Type::Flow, "KIL"), 567 INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
549 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 568 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
550 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 569 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
570 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
551 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), 571 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
552 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), 572 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
553 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), 573 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0a33868b7..30be38dd4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -654,7 +654,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
654 buffer_draw_state.bindpoint = current_bindpoint + bindpoint; 654 buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
655 655
656 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); 656 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
657 std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); 657
658 std::vector<u8> data;
659 if (used_buffer.IsIndirect()) {
660 // Buffer is accessed indirectly, so upload the entire thing
661 data.resize(buffer.size * sizeof(float));
662 } else {
663 // Buffer is accessed directly, upload just what we use
664 data.resize(used_buffer.GetSize() * sizeof(float));
665 }
666
658 Memory::ReadBlock(*addr, data.data(), data.size()); 667 Memory::ReadBlock(*addr, data.data(), data.size());
659 668
660 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); 669 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7a59ecccf..a703b9151 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,7 +20,6 @@ using Tegra::Shader::OpCode;
20using Tegra::Shader::Register; 20using Tegra::Shader::Register;
21using Tegra::Shader::Sampler; 21using Tegra::Shader::Sampler;
22using Tegra::Shader::SubOp; 22using Tegra::Shader::SubOp;
23using Tegra::Shader::Uniform;
24 23
25constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 24constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
26 25
@@ -365,11 +364,9 @@ public:
365 } 364 }
366 365
367 /// Generates code representing a uniform (C buffer) register, interpreted as the input type. 366 /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
368 std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) { 367 std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
369 declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index), 368 declr_const_buffers[index].MarkAsUsed(index, offset, stage);
370 static_cast<unsigned>(uniform.offset), stage); 369 std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']';
371 std::string value =
372 'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']';
373 370
374 if (type == GLSLRegister::Type::Float) { 371 if (type == GLSLRegister::Type::Float) {
375 return value; 372 return value;
@@ -380,10 +377,19 @@ public:
380 } 377 }
381 } 378 }
382 379
383 /// Generates code representing a uniform (C buffer) register, interpreted as the type of the 380 std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
384 /// destination register. 381 GLSLRegister::Type type) {
385 std::string GetUniform(const Uniform& uniform, const Register& dest_reg) { 382 declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
386 return GetUniform(uniform, regs[dest_reg].GetActiveType()); 383 std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
384 GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]";
385
386 if (type == GLSLRegister::Type::Float) {
387 return value;
388 } else if (type == GLSLRegister::Type::Integer) {
389 return "floatBitsToInt(" + value + ')';
390 } else {
391 UNREACHABLE();
392 }
387 } 393 }
388 394
389 /// Add declarations for registers 395 /// Add declarations for registers
@@ -747,7 +753,8 @@ private:
747 if (instr.is_b_gpr) { 753 if (instr.is_b_gpr) {
748 op_b += regs.GetRegisterAsFloat(instr.gpr20); 754 op_b += regs.GetRegisterAsFloat(instr.gpr20);
749 } else { 755 } else {
750 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 756 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
757 GLSLRegister::Type::Float);
751 } 758 }
752 } 759 }
753 760
@@ -904,7 +911,8 @@ private:
904 if (instr.is_b_gpr) { 911 if (instr.is_b_gpr) {
905 op_b += regs.GetRegisterAsInteger(instr.gpr20); 912 op_b += regs.GetRegisterAsInteger(instr.gpr20);
906 } else { 913 } else {
907 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 914 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
915 GLSLRegister::Type::Integer);
908 } 916 }
909 } 917 }
910 918
@@ -936,7 +944,8 @@ private:
936 if (instr.is_b_gpr) { 944 if (instr.is_b_gpr) {
937 op_b += regs.GetRegisterAsInteger(instr.gpr20); 945 op_b += regs.GetRegisterAsInteger(instr.gpr20);
938 } else { 946 } else {
939 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 947 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
948 GLSLRegister::Type::Integer);
940 } 949 }
941 } 950 }
942 951
@@ -953,7 +962,8 @@ private:
953 962
954 switch (opcode->GetId()) { 963 switch (opcode->GetId()) {
955 case OpCode::Id::FFMA_CR: { 964 case OpCode::Id::FFMA_CR: {
956 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 965 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
966 GLSLRegister::Type::Float);
957 op_c += regs.GetRegisterAsFloat(instr.gpr39); 967 op_c += regs.GetRegisterAsFloat(instr.gpr39);
958 break; 968 break;
959 } 969 }
@@ -964,7 +974,8 @@ private:
964 } 974 }
965 case OpCode::Id::FFMA_RC: { 975 case OpCode::Id::FFMA_RC: {
966 op_b += regs.GetRegisterAsFloat(instr.gpr39); 976 op_b += regs.GetRegisterAsFloat(instr.gpr39);
967 op_c += regs.GetUniform(instr.uniform, instr.gpr0); 977 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
978 GLSLRegister::Type::Float);
968 break; 979 break;
969 } 980 }
970 case OpCode::Id::FFMA_IMM: { 981 case OpCode::Id::FFMA_IMM: {
@@ -1079,6 +1090,33 @@ private:
1079 attribute); 1090 attribute);
1080 break; 1091 break;
1081 } 1092 }
1093 case OpCode::Id::LD_C: {
1094 ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
1095
1096 std::string op_a =
1097 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
1098 GLSLRegister::Type::Float);
1099 std::string op_b =
1100 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
1101 GLSLRegister::Type::Float);
1102
1103 switch (instr.ld_c.type.Value()) {
1104 case Tegra::Shader::UniformType::Single:
1105 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
1106 break;
1107
1108 case Tegra::Shader::UniformType::Double:
1109 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
1110 regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
1111 break;
1112
1113 default:
1114 NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}",
1115 static_cast<unsigned>(instr.ld_c.type.Value()));
1116 UNREACHABLE();
1117 }
1118 break;
1119 }
1082 case OpCode::Id::ST_A: { 1120 case OpCode::Id::ST_A: {
1083 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); 1121 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
1084 regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element, 1122 regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element,
@@ -1175,7 +1213,8 @@ private:
1175 if (instr.is_b_gpr) { 1213 if (instr.is_b_gpr) {
1176 op_b += regs.GetRegisterAsFloat(instr.gpr20); 1214 op_b += regs.GetRegisterAsFloat(instr.gpr20);
1177 } else { 1215 } else {
1178 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); 1216 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1217 GLSLRegister::Type::Float);
1179 } 1218 }
1180 } 1219 }
1181 1220
@@ -1216,7 +1255,8 @@ private:
1216 if (instr.is_b_gpr) { 1255 if (instr.is_b_gpr) {
1217 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); 1256 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
1218 } else { 1257 } else {
1219 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 1258 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1259 GLSLRegister::Type::Integer);
1220 } 1260 }
1221 1261
1222 using Tegra::Shader::Pred; 1262 using Tegra::Shader::Pred;
@@ -1262,7 +1302,8 @@ private:
1262 if (instr.is_b_gpr) { 1302 if (instr.is_b_gpr) {
1263 op_b += regs.GetRegisterAsFloat(instr.gpr20); 1303 op_b += regs.GetRegisterAsFloat(instr.gpr20);
1264 } else { 1304 } else {
1265 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); 1305 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1306 GLSLRegister::Type::Float);
1266 } 1307 }
1267 } 1308 }
1268 1309
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 458032b5c..ad795610c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -22,17 +22,28 @@ class ConstBufferEntry {
22 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23 23
24public: 24public:
25 void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) { 25 void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
26 is_used = true; 26 is_used = true;
27 this->index = index; 27 this->index = static_cast<unsigned>(index);
28 this->stage = stage;
29 max_offset = std::max(max_offset, static_cast<unsigned>(offset));
30 }
31
32 void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
33 is_used = true;
34 is_indirect = true;
35 this->index = static_cast<unsigned>(index);
28 this->stage = stage; 36 this->stage = stage;
29 max_offset = std::max(max_offset, offset);
30 } 37 }
31 38
32 bool IsUsed() const { 39 bool IsUsed() const {
33 return is_used; 40 return is_used;
34 } 41 }
35 42
43 bool IsIndirect() const {
44 return is_indirect;
45 }
46
36 unsigned GetIndex() const { 47 unsigned GetIndex() const {
37 return index; 48 return index;
38 } 49 }
@@ -51,6 +62,7 @@ private:
51 }; 62 };
52 63
53 bool is_used{}; 64 bool is_used{};
65 bool is_indirect{};
54 unsigned index{}; 66 unsigned index{};
55 unsigned max_offset{}; 67 unsigned max_offset{};
56 Maxwell::ShaderStage stage; 68 Maxwell::ShaderStage stage;