diff options
| author | 2019-02-24 23:04:22 -0500 | |
|---|---|---|
| committer | 2019-02-24 23:04:22 -0500 | |
| commit | c07987dfab78e32a14ea3cb883f71323c99ad034 (patch) | |
| tree | 1da5948cc12c8261b5e740ce14d3cccee78e0536 | |
| parent | Merge pull request #2119 from FernandoS27/fix-copy (diff) | |
| parent | shader_decompiler: Improve Accuracy of Attribute Interpolation. (diff) | |
| download | yuzu-c07987dfab78e32a14ea3cb883f71323c99ad034.tar.gz yuzu-c07987dfab78e32a14ea3cb883f71323c99ad034.tar.xz yuzu-c07987dfab78e32a14ea3cb883f71323c99ad034.zip | |
Merge pull request #2118 from FernandoS27/ipa-improve
shader_decompiler: Improve Accuracy of Attribute Interpolation.
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/shader_header.h | 41 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 15 |
6 files changed, 74 insertions, 38 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 1f425f90b..252592edd 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -376,9 +376,9 @@ enum class R2pMode : u64 { | |||
| 376 | }; | 376 | }; |
| 377 | 377 | ||
| 378 | enum class IpaInterpMode : u64 { | 378 | enum class IpaInterpMode : u64 { |
| 379 | Linear = 0, | 379 | Pass = 0, |
| 380 | Perspective = 1, | 380 | Multiply = 1, |
| 381 | Flat = 2, | 381 | Constant = 2, |
| 382 | Sc = 3, | 382 | Sc = 3, |
| 383 | }; | 383 | }; |
| 384 | 384 | ||
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index cf2b76ff6..e86a7f04a 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h | |||
| @@ -16,6 +16,13 @@ enum class OutputTopology : u32 { | |||
| 16 | TriangleStrip = 7, | 16 | TriangleStrip = 7, |
| 17 | }; | 17 | }; |
| 18 | 18 | ||
| 19 | enum class AttributeUse : u8 { | ||
| 20 | Unused = 0, | ||
| 21 | Constant = 1, | ||
| 22 | Perspective = 2, | ||
| 23 | ScreenLinear = 3, | ||
| 24 | }; | ||
| 25 | |||
| 19 | // Documentation in: | 26 | // Documentation in: |
| 20 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture | 27 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture |
| 21 | struct Header { | 28 | struct Header { |
| @@ -84,9 +91,15 @@ struct Header { | |||
| 84 | } vtg; | 91 | } vtg; |
| 85 | 92 | ||
| 86 | struct { | 93 | struct { |
| 87 | INSERT_PADDING_BYTES(3); // ImapSystemValuesA | 94 | INSERT_PADDING_BYTES(3); // ImapSystemValuesA |
| 88 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB | 95 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB |
| 89 | INSERT_PADDING_BYTES(32); // ImapGenericVector[32] | 96 | union { |
| 97 | BitField<0, 2, AttributeUse> x; | ||
| 98 | BitField<2, 2, AttributeUse> y; | ||
| 99 | BitField<4, 2, AttributeUse> w; | ||
| 100 | BitField<6, 2, AttributeUse> z; | ||
| 101 | u8 raw; | ||
| 102 | } imap_generic_vector[32]; | ||
| 90 | INSERT_PADDING_BYTES(2); // ImapColor | 103 | INSERT_PADDING_BYTES(2); // ImapColor |
| 91 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC | 104 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC |
| 92 | INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] | 105 | INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] |
| @@ -103,6 +116,28 @@ struct Header { | |||
| 103 | const u32 bit = render_target * 4 + component; | 116 | const u32 bit = render_target * 4 + component; |
| 104 | return omap.target & (1 << bit); | 117 | return omap.target & (1 << bit); |
| 105 | } | 118 | } |
| 119 | AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { | ||
| 120 | return static_cast<AttributeUse>( | ||
| 121 | (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); | ||
| 122 | } | ||
| 123 | AttributeUse GetAttributeUse(u32 attribute) const { | ||
| 124 | AttributeUse result = AttributeUse::Unused; | ||
| 125 | for (u32 i = 0; i < 4; i++) { | ||
| 126 | const auto index = GetAttributeIndexUse(attribute, i); | ||
| 127 | if (index == AttributeUse::Unused) { | ||
| 128 | continue; | ||
| 129 | } | ||
| 130 | if (result == AttributeUse::Unused || result == index) { | ||
| 131 | result = index; | ||
| 132 | continue; | ||
| 133 | } | ||
| 134 | LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); | ||
| 135 | if (index == AttributeUse::Perspective) { | ||
| 136 | result = index; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | return result; | ||
| 140 | } | ||
| 106 | } ps; | 141 | } ps; |
| 107 | }; | 142 | }; |
| 108 | 143 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index db18f4dbe..72ff6ac6a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | namespace OpenGL::GLShader { | 20 | namespace OpenGL::GLShader { |
| 21 | 21 | ||
| 22 | using Tegra::Shader::Attribute; | 22 | using Tegra::Shader::Attribute; |
| 23 | using Tegra::Shader::AttributeUse; | ||
| 23 | using Tegra::Shader::Header; | 24 | using Tegra::Shader::Header; |
| 24 | using Tegra::Shader::IpaInterpMode; | 25 | using Tegra::Shader::IpaInterpMode; |
| 25 | using Tegra::Shader::IpaMode; | 26 | using Tegra::Shader::IpaMode; |
| @@ -288,34 +289,22 @@ private: | |||
| 288 | code.AddNewLine(); | 289 | code.AddNewLine(); |
| 289 | } | 290 | } |
| 290 | 291 | ||
| 291 | std::string GetInputFlags(const IpaMode& input_mode) { | 292 | std::string GetInputFlags(AttributeUse attribute) { |
| 292 | const IpaSampleMode sample_mode = input_mode.sampling_mode; | ||
| 293 | const IpaInterpMode interp_mode = input_mode.interpolation_mode; | ||
| 294 | std::string out; | 293 | std::string out; |
| 295 | 294 | ||
| 296 | switch (interp_mode) { | 295 | switch (attribute) { |
| 297 | case IpaInterpMode::Flat: | 296 | case AttributeUse::Constant: |
| 298 | out += "flat "; | 297 | out += "flat "; |
| 299 | break; | 298 | break; |
| 300 | case IpaInterpMode::Linear: | 299 | case AttributeUse::ScreenLinear: |
| 301 | out += "noperspective "; | 300 | out += "noperspective "; |
| 302 | break; | 301 | break; |
| 303 | case IpaInterpMode::Perspective: | 302 | case AttributeUse::Perspective: |
| 304 | // Default, Smooth | 303 | // Default, Smooth |
| 305 | break; | 304 | break; |
| 306 | default: | 305 | default: |
| 307 | UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); | 306 | LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); |
| 308 | } | 307 | UNREACHABLE(); |
| 309 | switch (sample_mode) { | ||
| 310 | case IpaSampleMode::Centroid: | ||
| 311 | // It can be implemented with the "centroid " keyword in GLSL | ||
| 312 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); | ||
| 313 | break; | ||
| 314 | case IpaSampleMode::Default: | ||
| 315 | // Default, n/a | ||
| 316 | break; | ||
| 317 | default: | ||
| 318 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); | ||
| 319 | } | 308 | } |
| 320 | return out; | 309 | return out; |
| 321 | } | 310 | } |
| @@ -324,16 +313,11 @@ private: | |||
| 324 | const auto& attributes = ir.GetInputAttributes(); | 313 | const auto& attributes = ir.GetInputAttributes(); |
| 325 | for (const auto element : attributes) { | 314 | for (const auto element : attributes) { |
| 326 | const Attribute::Index index = element.first; | 315 | const Attribute::Index index = element.first; |
| 327 | const IpaMode& input_mode = *element.second.begin(); | ||
| 328 | if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { | 316 | if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { |
| 329 | // Skip when it's not a generic attribute | 317 | // Skip when it's not a generic attribute |
| 330 | continue; | 318 | continue; |
| 331 | } | 319 | } |
| 332 | 320 | ||
| 333 | ASSERT(element.second.size() > 0); | ||
| 334 | UNIMPLEMENTED_IF_MSG(element.second.size() > 1, | ||
| 335 | "Multiple input flag modes are not supported in GLSL"); | ||
| 336 | |||
| 337 | // TODO(bunnei): Use proper number of elements for these | 321 | // TODO(bunnei): Use proper number of elements for these |
| 338 | u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | 322 | u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); |
| 339 | if (stage != ShaderStage::Vertex) { | 323 | if (stage != ShaderStage::Vertex) { |
| @@ -345,8 +329,14 @@ private: | |||
| 345 | if (stage == ShaderStage::Geometry) { | 329 | if (stage == ShaderStage::Geometry) { |
| 346 | attr = "gs_" + attr + "[]"; | 330 | attr = "gs_" + attr + "[]"; |
| 347 | } | 331 | } |
| 348 | code.AddLine("layout (location = " + std::to_string(idx) + ") " + | 332 | std::string suffix; |
| 349 | GetInputFlags(input_mode) + "in vec4 " + attr + ';'); | 333 | if (stage == ShaderStage::Fragment) { |
| 334 | const auto input_mode = | ||
| 335 | header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); | ||
| 336 | suffix = GetInputFlags(input_mode); | ||
| 337 | } | ||
| 338 | code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + | ||
| 339 | attr + ';'); | ||
| 350 | } | 340 | } |
| 351 | if (!attributes.empty()) | 341 | if (!attributes.empty()) |
| 352 | code.AddNewLine(); | 342 | code.AddNewLine(); |
| @@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st | |||
| 1584 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; | 1574 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; |
| 1585 | } | 1575 | } |
| 1586 | 1576 | ||
| 1587 | } // namespace OpenGL::GLShader \ No newline at end of file | 1577 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 04e1db911..7d96649af 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5; | |||
| 124 | layout (location = 6) out vec4 FragColor6; | 124 | layout (location = 6) out vec4 FragColor6; |
| 125 | layout (location = 7) out vec4 FragColor7; | 125 | layout (location = 7) out vec4 FragColor7; |
| 126 | 126 | ||
| 127 | layout (location = 0) in vec4 position; | 127 | layout (location = 0) in noperspective vec4 position; |
| 128 | 128 | ||
| 129 | layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | 129 | layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { |
| 130 | vec4 viewport_flip; | 130 | vec4 viewport_flip; |
| @@ -172,4 +172,4 @@ void main() { | |||
| 172 | return {out, program.second}; | 172 | return {out, program.second}; |
| 173 | } | 173 | } |
| 174 | 174 | ||
| 175 | } // namespace OpenGL::GLShader \ No newline at end of file | 175 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 55ec601ff..38f01ca50 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 48 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | 48 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |
| 49 | "Unaligned attribute loads are not supported"); | 49 | "Unaligned attribute loads are not supported"); |
| 50 | 50 | ||
| 51 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | 51 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass, |
| 52 | Tegra::Shader::IpaSampleMode::Default}; | 52 | Tegra::Shader::IpaSampleMode::Default}; |
| 53 | 53 | ||
| 54 | u64 next_element = instr.attribute.fmt20.element; | 54 | u64 next_element = instr.attribute.fmt20.element; |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index f9502e3d0..d750a2936 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 135 | instr.ipa.sample_mode.Value()}; | 135 | instr.ipa.sample_mode.Value()}; |
| 136 | 136 | ||
| 137 | const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); | 137 | const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); |
| 138 | const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); | 138 | Node value = attr; |
| 139 | const Tegra::Shader::Attribute::Index index = attribute.index.Value(); | ||
| 140 | if (index >= Tegra::Shader::Attribute::Index::Attribute_0 && | ||
| 141 | index <= Tegra::Shader::Attribute::Index::Attribute_31) { | ||
| 142 | // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. | ||
| 143 | // In theory by setting them as perspective, OpenGL does the perspective correction. | ||
| 144 | // A way must figured to reverse the last step of it. | ||
| 145 | if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { | ||
| 146 | value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | value = GetSaturatedFloat(value, instr.ipa.saturate); | ||
| 139 | 150 | ||
| 140 | SetRegister(bb, instr.gpr0, value); | 151 | SetRegister(bb, instr.gpr0, value); |
| 141 | break; | 152 | break; |
| @@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 175 | return pc; | 186 | return pc; |
| 176 | } | 187 | } |
| 177 | 188 | ||
| 178 | } // namespace VideoCommon::Shader \ No newline at end of file | 189 | } // namespace VideoCommon::Shader |