diff options
| author | 2018-09-04 23:09:24 -0400 | |
|---|---|---|
| committer | 2018-09-05 20:15:47 -0400 | |
| commit | e63b229f4a0704b37e0a6b57dde76deb2f151c81 (patch) | |
| tree | 500d239fea733b32022a123c1b5be9ef68ed0b2c | |
| parent | Merge pull request #1240 from degasus/optimizations (diff) | |
| download | yuzu-e63b229f4a0704b37e0a6b57dde76deb2f151c81.tar.gz yuzu-e63b229f4a0704b37e0a6b57dde76deb2f151c81.tar.xz yuzu-e63b229f4a0704b37e0a6b57dde76deb2f151c81.zip | |
Implemented IPA Properly
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 133 |
2 files changed, 98 insertions, 47 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index a7daea766..d2388673e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -76,6 +76,7 @@ union Attribute { | |||
| 76 | Position = 7, | 76 | Position = 7, |
| 77 | Attribute_0 = 8, | 77 | Attribute_0 = 8, |
| 78 | Attribute_31 = 39, | 78 | Attribute_31 = 39, |
| 79 | PointCoord = 46, | ||
| 79 | // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex | 80 | // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex |
| 80 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval | 81 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval |
| 81 | // shader. | 82 | // shader. |
| @@ -246,6 +247,17 @@ enum class TextureType : u64 { | |||
| 246 | enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; | 247 | enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; |
| 247 | enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; | 248 | enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; |
| 248 | 249 | ||
| 250 | struct IpaMode { | ||
| 251 | IpaInterpMode interpolation_mode; | ||
| 252 | IpaSampleMode sampling_mode; | ||
| 253 | inline bool operator==(const IpaMode& a) { | ||
| 254 | return (a.interpolation_mode == interpolation_mode) && (a.sampling_mode == sampling_mode); | ||
| 255 | } | ||
| 256 | inline bool operator!=(const IpaMode& a) { | ||
| 257 | return !((*this) == a); | ||
| 258 | } | ||
| 259 | }; | ||
| 260 | |||
| 249 | union Instruction { | 261 | union Instruction { |
| 250 | Instruction& operator=(const Instruction& instr) { | 262 | Instruction& operator=(const Instruction& instr) { |
| 251 | value = instr.value; | 263 | value = instr.value; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d3e8f5078..781ddb073 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -247,6 +247,7 @@ public: | |||
| 247 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix) | 247 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix) |
| 248 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} { | 248 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} { |
| 249 | BuildRegisterList(); | 249 | BuildRegisterList(); |
| 250 | BuildInputList(); | ||
| 250 | } | 251 | } |
| 251 | 252 | ||
| 252 | /** | 253 | /** |
| @@ -343,9 +344,10 @@ public: | |||
| 343 | * @param elem The element to use for the operation. | 344 | * @param elem The element to use for the operation. |
| 344 | * @param attribute The input attribute to use as the source value. | 345 | * @param attribute The input attribute to use as the source value. |
| 345 | */ | 346 | */ |
| 346 | void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) { | 347 | void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute, |
| 348 | const Tegra::Shader::IpaMode& input_mode) { | ||
| 347 | std::string dest = GetRegisterAsFloat(reg); | 349 | std::string dest = GetRegisterAsFloat(reg); |
| 348 | std::string src = GetInputAttribute(attribute) + GetSwizzle(elem); | 350 | std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem); |
| 349 | shader.AddLine(dest + " = " + src + ';'); | 351 | shader.AddLine(dest + " = " + src + ';'); |
| 350 | } | 352 | } |
| 351 | 353 | ||
| @@ -412,12 +414,13 @@ public: | |||
| 412 | } | 414 | } |
| 413 | declarations.AddNewLine(); | 415 | declarations.AddNewLine(); |
| 414 | 416 | ||
| 415 | for (const auto& index : declr_input_attribute) { | 417 | for (const auto element : declr_input_attribute) { |
| 416 | // TODO(bunnei): Use proper number of elements for these | 418 | // TODO(bunnei): Use proper number of elements for these |
| 417 | declarations.AddLine("layout(location = " + | 419 | u32 idx = |
| 418 | std::to_string(static_cast<u32>(index) - | 420 | static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0); |
| 419 | static_cast<u32>(Attribute::Index::Attribute_0)) + | 421 | declarations.AddLine("layout(location = " + std::to_string(idx) + ")" + |
| 420 | ") in vec4 " + GetInputAttribute(index) + ';'); | 422 | GetInputFlags(element.first) + "in vec4 " + |
| 423 | GetInputAttribute(element.first, element.second) + ';'); | ||
| 421 | } | 424 | } |
| 422 | declarations.AddNewLine(); | 425 | declarations.AddNewLine(); |
| 423 | 426 | ||
| @@ -532,11 +535,24 @@ private: | |||
| 532 | } | 535 | } |
| 533 | } | 536 | } |
| 534 | 537 | ||
| 538 | void BuildInputList() { | ||
| 539 | const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) - | ||
| 540 | static_cast<u32>(Attribute::Index::Attribute_0) + 1; | ||
| 541 | declr_input_attribute.reserve(size); | ||
| 542 | } | ||
| 543 | |||
| 535 | /// Generates code representing an input attribute register. | 544 | /// Generates code representing an input attribute register. |
| 536 | std::string GetInputAttribute(Attribute::Index attribute) { | 545 | std::string GetInputAttribute(Attribute::Index attribute, |
| 546 | const Tegra::Shader::IpaMode& input_mode) { | ||
| 537 | switch (attribute) { | 547 | switch (attribute) { |
| 538 | case Attribute::Index::Position: | 548 | case Attribute::Index::Position: |
| 539 | return "position"; | 549 | if (stage != Maxwell3D::Regs::ShaderStage::Fragment) { |
| 550 | return "position"; | ||
| 551 | } else { | ||
| 552 | return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)"; | ||
| 553 | } | ||
| 554 | case Attribute::Index::PointCoord: | ||
| 555 | return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)"; | ||
| 540 | case Attribute::Index::TessCoordInstanceIDVertexID: | 556 | case Attribute::Index::TessCoordInstanceIDVertexID: |
| 541 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 557 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 542 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 558 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| @@ -552,7 +568,14 @@ private: | |||
| 552 | static_cast<u32>(Attribute::Index::Attribute_0)}; | 568 | static_cast<u32>(Attribute::Index::Attribute_0)}; |
| 553 | if (attribute >= Attribute::Index::Attribute_0 && | 569 | if (attribute >= Attribute::Index::Attribute_0 && |
| 554 | attribute <= Attribute::Index::Attribute_31) { | 570 | attribute <= Attribute::Index::Attribute_31) { |
| 555 | declr_input_attribute.insert(attribute); | 571 | if (declr_input_attribute.count(attribute) == 0) { |
| 572 | declr_input_attribute[attribute] = input_mode; | ||
| 573 | } else { | ||
| 574 | if (declr_input_attribute[attribute] != input_mode) { | ||
| 575 | LOG_CRITICAL(HW_GPU, "Same Input multiple input modes"); | ||
| 576 | UNREACHABLE(); | ||
| 577 | } | ||
| 578 | } | ||
| 556 | return "input_attribute_" + std::to_string(index); | 579 | return "input_attribute_" + std::to_string(index); |
| 557 | } | 580 | } |
| 558 | 581 | ||
| @@ -563,6 +586,49 @@ private: | |||
| 563 | return "vec4(0, 0, 0, 0)"; | 586 | return "vec4(0, 0, 0, 0)"; |
| 564 | } | 587 | } |
| 565 | 588 | ||
| 589 | std::string GetInputFlags(const Attribute::Index attribute) { | ||
| 590 | const Tegra::Shader::IpaSampleMode sample_mode = | ||
| 591 | declr_input_attribute[attribute].sampling_mode; | ||
| 592 | const Tegra::Shader::IpaInterpMode interp_mode = | ||
| 593 | declr_input_attribute[attribute].interpolation_mode; | ||
| 594 | std::string out; | ||
| 595 | switch (interp_mode) { | ||
| 596 | case Tegra::Shader::IpaInterpMode::Flat: { | ||
| 597 | out += "flat "; | ||
| 598 | break; | ||
| 599 | } | ||
| 600 | case Tegra::Shader::IpaInterpMode::Linear: { | ||
| 601 | out += "noperspective "; | ||
| 602 | break; | ||
| 603 | } | ||
| 604 | case Tegra::Shader::IpaInterpMode::Perspective: { | ||
| 605 | // Default, Smooth | ||
| 606 | break; | ||
| 607 | } | ||
| 608 | default: { | ||
| 609 | LOG_CRITICAL(HW_GPU, "Unhandled Ipa InterpMode: {}", static_cast<u32>(interp_mode)); | ||
| 610 | UNREACHABLE(); | ||
| 611 | } | ||
| 612 | } | ||
| 613 | switch (sample_mode) { | ||
| 614 | case Tegra::Shader::IpaSampleMode::Centroid: { | ||
| 615 | // Note not implemented, it can be implemented with the "centroid " keyword in glsl; | ||
| 616 | LOG_CRITICAL(HW_GPU, "Ipa Sampler Mode: centroid, not implemented"); | ||
| 617 | UNREACHABLE(); | ||
| 618 | break; | ||
| 619 | } | ||
| 620 | case Tegra::Shader::IpaSampleMode::Default: { | ||
| 621 | // Default, n/a | ||
| 622 | break; | ||
| 623 | } | ||
| 624 | default: { | ||
| 625 | LOG_CRITICAL(HW_GPU, "Unhandled Ipa SampleMode: {}", static_cast<u32>(sample_mode)); | ||
| 626 | UNREACHABLE(); | ||
| 627 | } | ||
| 628 | } | ||
| 629 | return out; | ||
| 630 | } | ||
| 631 | |||
| 566 | /// Generates code representing an output attribute register. | 632 | /// Generates code representing an output attribute register. |
| 567 | std::string GetOutputAttribute(Attribute::Index attribute) { | 633 | std::string GetOutputAttribute(Attribute::Index attribute) { |
| 568 | switch (attribute) { | 634 | switch (attribute) { |
| @@ -593,7 +659,7 @@ private: | |||
| 593 | ShaderWriter& shader; | 659 | ShaderWriter& shader; |
| 594 | ShaderWriter& declarations; | 660 | ShaderWriter& declarations; |
| 595 | std::vector<GLSLRegister> regs; | 661 | std::vector<GLSLRegister> regs; |
| 596 | std::set<Attribute::Index> declr_input_attribute; | 662 | std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute; |
| 597 | std::set<Attribute::Index> declr_output_attribute; | 663 | std::set<Attribute::Index> declr_output_attribute; |
| 598 | std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; | 664 | std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; |
| 599 | std::vector<SamplerEntry> used_samplers; | 665 | std::vector<SamplerEntry> used_samplers; |
| @@ -1634,8 +1700,12 @@ private: | |||
| 1634 | switch (opcode->GetId()) { | 1700 | switch (opcode->GetId()) { |
| 1635 | case OpCode::Id::LD_A: { | 1701 | case OpCode::Id::LD_A: { |
| 1636 | ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); | 1702 | ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); |
| 1703 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 1704 | |||
| 1705 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 1706 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 1637 | regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element, | 1707 | regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element, |
| 1638 | instr.attribute.fmt20.index); | 1708 | instr.attribute.fmt20.index, input_mode); |
| 1639 | break; | 1709 | break; |
| 1640 | } | 1710 | } |
| 1641 | case OpCode::Id::LD_C: { | 1711 | case OpCode::Id::LD_C: { |
| @@ -2127,42 +2197,11 @@ private: | |||
| 2127 | case OpCode::Id::IPA: { | 2197 | case OpCode::Id::IPA: { |
| 2128 | const auto& attribute = instr.attribute.fmt28; | 2198 | const auto& attribute = instr.attribute.fmt28; |
| 2129 | const auto& reg = instr.gpr0; | 2199 | const auto& reg = instr.gpr0; |
| 2130 | ASSERT_MSG(instr.ipa.sample_mode == Tegra::Shader::IpaSampleMode::Default, | ||
| 2131 | "Unhandled IPA sample mode: {}", | ||
| 2132 | static_cast<u32>(instr.ipa.sample_mode.Value())); | ||
| 2133 | ASSERT_MSG(instr.ipa.saturate == 0, "IPA saturate not implemented"); | 2200 | ASSERT_MSG(instr.ipa.saturate == 0, "IPA saturate not implemented"); |
| 2134 | switch (instr.ipa.interp_mode) { | 2201 | Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), |
| 2135 | case Tegra::Shader::IpaInterpMode::Linear: | 2202 | instr.ipa.sample_mode.Value()}; |
| 2136 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment && | 2203 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index, |
| 2137 | attribute.index == Attribute::Index::Position) { | 2204 | input_mode); |
| 2138 | switch (attribute.element) { | ||
| 2139 | case 0: | ||
| 2140 | shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.x;"); | ||
| 2141 | break; | ||
| 2142 | case 1: | ||
| 2143 | shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.y;"); | ||
| 2144 | break; | ||
| 2145 | case 2: | ||
| 2146 | shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.z;"); | ||
| 2147 | break; | ||
| 2148 | case 3: | ||
| 2149 | shader.AddLine(regs.GetRegisterAsFloat(reg) + " = 1.0;"); | ||
| 2150 | break; | ||
| 2151 | } | ||
| 2152 | } else { | ||
| 2153 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index); | ||
| 2154 | } | ||
| 2155 | break; | ||
| 2156 | case Tegra::Shader::IpaInterpMode::Perspective: | ||
| 2157 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index); | ||
| 2158 | break; | ||
| 2159 | default: | ||
| 2160 | LOG_CRITICAL(HW_GPU, "Unhandled IPA mode: {}", | ||
| 2161 | static_cast<u32>(instr.ipa.interp_mode.Value())); | ||
| 2162 | UNREACHABLE(); | ||
| 2163 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index); | ||
| 2164 | } | ||
| 2165 | |||
| 2166 | break; | 2205 | break; |
| 2167 | } | 2206 | } |
| 2168 | case OpCode::Id::SSY: { | 2207 | case OpCode::Id::SSY: { |