diff options
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 32 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 77 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 18 |
4 files changed, 110 insertions, 28 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 4eb507325..af18c2d81 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -109,11 +109,6 @@ union Sampler { | |||
| 109 | u64 value{}; | 109 | u64 value{}; |
| 110 | }; | 110 | }; |
| 111 | 111 | ||
| 112 | union Uniform { | ||
| 113 | BitField<20, 14, u64> offset; | ||
| 114 | BitField<34, 5, u64> index; | ||
| 115 | }; | ||
| 116 | |||
| 117 | } // namespace Shader | 112 | } // namespace Shader |
| 118 | } // namespace Tegra | 113 | } // namespace Tegra |
| 119 | 114 | ||
| @@ -180,6 +175,15 @@ enum class FloatRoundingOp : u64 { | |||
| 180 | Trunc = 3, | 175 | Trunc = 3, |
| 181 | }; | 176 | }; |
| 182 | 177 | ||
| 178 | enum class UniformType : u64 { | ||
| 179 | UnsignedByte = 0, | ||
| 180 | SignedByte = 1, | ||
| 181 | UnsignedShort = 2, | ||
| 182 | SignedShort = 3, | ||
| 183 | Single = 4, | ||
| 184 | Double = 5, | ||
| 185 | }; | ||
| 186 | |||
| 183 | union Instruction { | 187 | union Instruction { |
| 184 | Instruction& operator=(const Instruction& instr) { | 188 | Instruction& operator=(const Instruction& instr) { |
| 185 | value = instr.value; | 189 | value = instr.value; |
| @@ -258,6 +262,11 @@ union Instruction { | |||
| 258 | } ffma; | 262 | } ffma; |
| 259 | 263 | ||
| 260 | union { | 264 | union { |
| 265 | BitField<48, 3, UniformType> type; | ||
| 266 | BitField<44, 2, u64> unknown; | ||
| 267 | } ld_c; | ||
| 268 | |||
| 269 | union { | ||
| 261 | BitField<0, 3, u64> pred0; | 270 | BitField<0, 3, u64> pred0; |
| 262 | BitField<3, 3, u64> pred3; | 271 | BitField<3, 3, u64> pred3; |
| 263 | BitField<7, 1, u64> abs_a; | 272 | BitField<7, 1, u64> abs_a; |
| @@ -354,12 +363,21 @@ union Instruction { | |||
| 354 | } | 363 | } |
| 355 | } bra; | 364 | } bra; |
| 356 | 365 | ||
| 366 | union { | ||
| 367 | BitField<20, 14, u64> offset; | ||
| 368 | BitField<34, 5, u64> index; | ||
| 369 | } cbuf34; | ||
| 370 | |||
| 371 | union { | ||
| 372 | BitField<20, 16, s64> offset; | ||
| 373 | BitField<36, 5, u64> index; | ||
| 374 | } cbuf36; | ||
| 375 | |||
| 357 | BitField<61, 1, u64> is_b_imm; | 376 | BitField<61, 1, u64> is_b_imm; |
| 358 | BitField<60, 1, u64> is_b_gpr; | 377 | BitField<60, 1, u64> is_b_gpr; |
| 359 | BitField<59, 1, u64> is_c_gpr; | 378 | BitField<59, 1, u64> is_c_gpr; |
| 360 | 379 | ||
| 361 | Attribute attribute; | 380 | Attribute attribute; |
| 362 | Uniform uniform; | ||
| 363 | Sampler sampler; | 381 | Sampler sampler; |
| 364 | 382 | ||
| 365 | u64 value; | 383 | u64 value; |
| @@ -374,6 +392,7 @@ public: | |||
| 374 | KIL, | 392 | KIL, |
| 375 | BRA, | 393 | BRA, |
| 376 | LD_A, | 394 | LD_A, |
| 395 | LD_C, | ||
| 377 | ST_A, | 396 | ST_A, |
| 378 | TEX, | 397 | TEX, |
| 379 | TEXQ, // Texture Query | 398 | TEXQ, // Texture Query |
| @@ -548,6 +567,7 @@ private: | |||
| 548 | INST("111000110011----", Id::KIL, Type::Flow, "KIL"), | 567 | INST("111000110011----", Id::KIL, Type::Flow, "KIL"), |
| 549 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | 568 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), |
| 550 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 569 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 570 | INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), | ||
| 551 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), | 571 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), |
| 552 | INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), | 572 | INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), |
| 553 | INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), | 573 | INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e9eb03ad9..b23b8fb29 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -625,7 +625,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr | |||
| 625 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; | 625 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; |
| 626 | 626 | ||
| 627 | boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); | 627 | boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); |
| 628 | std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); | 628 | |
| 629 | std::vector<u8> data; | ||
| 630 | if (used_buffer.IsIndirect()) { | ||
| 631 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 632 | data.resize(buffer.size * sizeof(float)); | ||
| 633 | } else { | ||
| 634 | // Buffer is accessed directly, upload just what we use | ||
| 635 | data.resize(used_buffer.GetSize() * sizeof(float)); | ||
| 636 | } | ||
| 637 | |||
| 629 | Memory::ReadBlock(*addr, data.data(), data.size()); | 638 | Memory::ReadBlock(*addr, data.data(), data.size()); |
| 630 | 639 | ||
| 631 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); | 640 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 15288bd57..3067ce3b3 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -20,7 +20,6 @@ using Tegra::Shader::OpCode; | |||
| 20 | using Tegra::Shader::Register; | 20 | using Tegra::Shader::Register; |
| 21 | using Tegra::Shader::Sampler; | 21 | using Tegra::Shader::Sampler; |
| 22 | using Tegra::Shader::SubOp; | 22 | using Tegra::Shader::SubOp; |
| 23 | using Tegra::Shader::Uniform; | ||
| 24 | 23 | ||
| 25 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | 24 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; |
| 26 | 25 | ||
| @@ -365,11 +364,9 @@ public: | |||
| 365 | } | 364 | } |
| 366 | 365 | ||
| 367 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. | 366 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. |
| 368 | std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) { | 367 | std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { |
| 369 | declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index), | 368 | declr_const_buffers[index].MarkAsUsed(index, offset, stage); |
| 370 | static_cast<unsigned>(uniform.offset), stage); | 369 | std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']'; |
| 371 | std::string value = | ||
| 372 | 'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']'; | ||
| 373 | 370 | ||
| 374 | if (type == GLSLRegister::Type::Float) { | 371 | if (type == GLSLRegister::Type::Float) { |
| 375 | return value; | 372 | return value; |
| @@ -380,10 +377,19 @@ public: | |||
| 380 | } | 377 | } |
| 381 | } | 378 | } |
| 382 | 379 | ||
| 383 | /// Generates code representing a uniform (C buffer) register, interpreted as the type of the | 380 | std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, |
| 384 | /// destination register. | 381 | GLSLRegister::Type type) { |
| 385 | std::string GetUniform(const Uniform& uniform, const Register& dest_reg) { | 382 | declr_const_buffers[index].MarkAsUsedIndirect(index, stage); |
| 386 | return GetUniform(uniform, regs[dest_reg].GetActiveType()); | 383 | std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" + |
| 384 | GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]"; | ||
| 385 | |||
| 386 | if (type == GLSLRegister::Type::Float) { | ||
| 387 | return value; | ||
| 388 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 389 | return "floatBitsToInt(" + value + ')'; | ||
| 390 | } else { | ||
| 391 | UNREACHABLE(); | ||
| 392 | } | ||
| 387 | } | 393 | } |
| 388 | 394 | ||
| 389 | /// Add declarations for registers | 395 | /// Add declarations for registers |
| @@ -778,7 +784,8 @@ private: | |||
| 778 | if (instr.is_b_gpr) { | 784 | if (instr.is_b_gpr) { |
| 779 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | 785 | op_b += regs.GetRegisterAsFloat(instr.gpr20); |
| 780 | } else { | 786 | } else { |
| 781 | op_b += regs.GetUniform(instr.uniform, instr.gpr0); | 787 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 788 | GLSLRegister::Type::Float); | ||
| 782 | } | 789 | } |
| 783 | } | 790 | } |
| 784 | 791 | ||
| @@ -935,7 +942,8 @@ private: | |||
| 935 | if (instr.is_b_gpr) { | 942 | if (instr.is_b_gpr) { |
| 936 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | 943 | op_b += regs.GetRegisterAsInteger(instr.gpr20); |
| 937 | } else { | 944 | } else { |
| 938 | op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); | 945 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 946 | GLSLRegister::Type::Integer); | ||
| 939 | } | 947 | } |
| 940 | } | 948 | } |
| 941 | 949 | ||
| @@ -967,7 +975,8 @@ private: | |||
| 967 | if (instr.is_b_gpr) { | 975 | if (instr.is_b_gpr) { |
| 968 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | 976 | op_b += regs.GetRegisterAsInteger(instr.gpr20); |
| 969 | } else { | 977 | } else { |
| 970 | op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); | 978 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 979 | GLSLRegister::Type::Integer); | ||
| 971 | } | 980 | } |
| 972 | } | 981 | } |
| 973 | 982 | ||
| @@ -984,7 +993,8 @@ private: | |||
| 984 | 993 | ||
| 985 | switch (opcode->GetId()) { | 994 | switch (opcode->GetId()) { |
| 986 | case OpCode::Id::FFMA_CR: { | 995 | case OpCode::Id::FFMA_CR: { |
| 987 | op_b += regs.GetUniform(instr.uniform, instr.gpr0); | 996 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 997 | GLSLRegister::Type::Float); | ||
| 988 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | 998 | op_c += regs.GetRegisterAsFloat(instr.gpr39); |
| 989 | break; | 999 | break; |
| 990 | } | 1000 | } |
| @@ -995,7 +1005,8 @@ private: | |||
| 995 | } | 1005 | } |
| 996 | case OpCode::Id::FFMA_RC: { | 1006 | case OpCode::Id::FFMA_RC: { |
| 997 | op_b += regs.GetRegisterAsFloat(instr.gpr39); | 1007 | op_b += regs.GetRegisterAsFloat(instr.gpr39); |
| 998 | op_c += regs.GetUniform(instr.uniform, instr.gpr0); | 1008 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 1009 | GLSLRegister::Type::Float); | ||
| 999 | break; | 1010 | break; |
| 1000 | } | 1011 | } |
| 1001 | case OpCode::Id::FFMA_IMM: { | 1012 | case OpCode::Id::FFMA_IMM: { |
| @@ -1110,6 +1121,33 @@ private: | |||
| 1110 | attribute); | 1121 | attribute); |
| 1111 | break; | 1122 | break; |
| 1112 | } | 1123 | } |
| 1124 | case OpCode::Id::LD_C: { | ||
| 1125 | ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented"); | ||
| 1126 | |||
| 1127 | std::string op_a = | ||
| 1128 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8, | ||
| 1129 | GLSLRegister::Type::Float); | ||
| 1130 | std::string op_b = | ||
| 1131 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8, | ||
| 1132 | GLSLRegister::Type::Float); | ||
| 1133 | |||
| 1134 | switch (instr.ld_c.type.Value()) { | ||
| 1135 | case Tegra::Shader::UniformType::Single: | ||
| 1136 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 1137 | break; | ||
| 1138 | |||
| 1139 | case Tegra::Shader::UniformType::Double: | ||
| 1140 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 1141 | regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); | ||
| 1142 | break; | ||
| 1143 | |||
| 1144 | default: | ||
| 1145 | NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}", | ||
| 1146 | static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 1147 | UNREACHABLE(); | ||
| 1148 | } | ||
| 1149 | break; | ||
| 1150 | } | ||
| 1113 | case OpCode::Id::ST_A: { | 1151 | case OpCode::Id::ST_A: { |
| 1114 | ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); | 1152 | ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); |
| 1115 | regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element, | 1153 | regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element, |
| @@ -1206,7 +1244,8 @@ private: | |||
| 1206 | if (instr.is_b_gpr) { | 1244 | if (instr.is_b_gpr) { |
| 1207 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | 1245 | op_b += regs.GetRegisterAsFloat(instr.gpr20); |
| 1208 | } else { | 1246 | } else { |
| 1209 | op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); | 1247 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 1248 | GLSLRegister::Type::Float); | ||
| 1210 | } | 1249 | } |
| 1211 | } | 1250 | } |
| 1212 | 1251 | ||
| @@ -1247,7 +1286,8 @@ private: | |||
| 1247 | if (instr.is_b_gpr) { | 1286 | if (instr.is_b_gpr) { |
| 1248 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); | 1287 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); |
| 1249 | } else { | 1288 | } else { |
| 1250 | op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); | 1289 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 1290 | GLSLRegister::Type::Integer); | ||
| 1251 | } | 1291 | } |
| 1252 | 1292 | ||
| 1253 | using Tegra::Shader::Pred; | 1293 | using Tegra::Shader::Pred; |
| @@ -1293,7 +1333,8 @@ private: | |||
| 1293 | if (instr.is_b_gpr) { | 1333 | if (instr.is_b_gpr) { |
| 1294 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | 1334 | op_b += regs.GetRegisterAsFloat(instr.gpr20); |
| 1295 | } else { | 1335 | } else { |
| 1296 | op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); | 1336 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, |
| 1337 | GLSLRegister::Type::Float); | ||
| 1297 | } | 1338 | } |
| 1298 | } | 1339 | } |
| 1299 | 1340 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index e8b78934c..ed890e0f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -22,17 +22,28 @@ class ConstBufferEntry { | |||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 23 | 23 | ||
| 24 | public: | 24 | public: |
| 25 | void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) { | 25 | void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) { |
| 26 | is_used = true; | 26 | is_used = true; |
| 27 | this->index = index; | 27 | this->index = static_cast<unsigned>(index); |
| 28 | this->stage = stage; | ||
| 29 | max_offset = std::max(max_offset, static_cast<unsigned>(offset)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) { | ||
| 33 | is_used = true; | ||
| 34 | is_indirect = true; | ||
| 35 | this->index = static_cast<unsigned>(index); | ||
| 28 | this->stage = stage; | 36 | this->stage = stage; |
| 29 | max_offset = std::max(max_offset, offset); | ||
| 30 | } | 37 | } |
| 31 | 38 | ||
| 32 | bool IsUsed() const { | 39 | bool IsUsed() const { |
| 33 | return is_used; | 40 | return is_used; |
| 34 | } | 41 | } |
| 35 | 42 | ||
| 43 | bool IsIndirect() const { | ||
| 44 | return is_indirect; | ||
| 45 | } | ||
| 46 | |||
| 36 | unsigned GetIndex() const { | 47 | unsigned GetIndex() const { |
| 37 | return index; | 48 | return index; |
| 38 | } | 49 | } |
| @@ -51,6 +62,7 @@ private: | |||
| 51 | }; | 62 | }; |
| 52 | 63 | ||
| 53 | bool is_used{}; | 64 | bool is_used{}; |
| 65 | bool is_indirect{}; | ||
| 54 | unsigned index{}; | 66 | unsigned index{}; |
| 55 | unsigned max_offset{}; | 67 | unsigned max_offset{}; |
| 56 | Maxwell::ShaderStage stage; | 68 | Maxwell::ShaderStage stage; |