diff options
| author | 2016-12-18 17:58:30 -0800 | |
|---|---|---|
| committer | 2017-01-29 21:31:37 -0800 | |
| commit | 92bf5c88e6f85ebeef161a0056c86c66bc25c6e7 (patch) | |
| tree | 984a05367d1cde9249bbd962817ebbbcd58813a9 /src | |
| parent | VideoCore: Consistently use shader configuration to load attributes (diff) | |
| download | yuzu-92bf5c88e6f85ebeef161a0056c86c66bc25c6e7.tar.gz yuzu-92bf5c88e6f85ebeef161a0056c86c66bc25c6e7.tar.xz yuzu-92bf5c88e6f85ebeef161a0056c86c66bc25c6e7.zip | |
VideoCore: Split shader output writing from semantic loading
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/command_processor.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 29 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 5 |
3 files changed, 24 insertions, 24 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index fef0b4ceb..4955ff9f9 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -151,10 +151,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 151 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | 151 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, |
| 152 | static_cast<void*>(&immediate_input)); | 152 | static_cast<void*>(&immediate_input)); |
| 153 | Shader::UnitState shader_unit; | 153 | Shader::UnitState shader_unit; |
| 154 | Shader::AttributeBuffer output{}; | ||
| 155 | |||
| 154 | shader_unit.LoadInput(regs.vs, immediate_input); | 156 | shader_unit.LoadInput(regs.vs, immediate_input); |
| 155 | shader_engine->Run(g_state.vs, shader_unit); | 157 | shader_engine->Run(g_state.vs, shader_unit); |
| 156 | auto output_vertex = Shader::OutputVertex::FromRegisters( | 158 | shader_unit.WriteOutput(regs.vs, output); |
| 157 | shader_unit.registers.output, regs, regs.vs.output_mask); | ||
| 158 | 159 | ||
| 159 | // Send to renderer | 160 | // Send to renderer |
| 160 | using Pica::Shader::OutputVertex; | 161 | using Pica::Shader::OutputVertex; |
| @@ -163,7 +164,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 163 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | 164 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); |
| 164 | }; | 165 | }; |
| 165 | 166 | ||
| 166 | g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | 167 | g_state.primitive_assembler.SubmitVertex( |
| 168 | Shader::OutputVertex::FromAttributeBuffer(regs, output), AddTriangle); | ||
| 167 | } | 169 | } |
| 168 | } | 170 | } |
| 169 | } | 171 | } |
| @@ -281,7 +283,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 281 | 283 | ||
| 282 | if (!vertex_cache_hit) { | 284 | if (!vertex_cache_hit) { |
| 283 | // Initialize data for the current vertex | 285 | // Initialize data for the current vertex |
| 284 | Shader::AttributeBuffer input; | 286 | Shader::AttributeBuffer input, output{}; |
| 285 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | 287 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); |
| 286 | 288 | ||
| 287 | // Send to vertex shader | 289 | // Send to vertex shader |
| @@ -290,10 +292,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 290 | (void*)&input); | 292 | (void*)&input); |
| 291 | shader_unit.LoadInput(regs.vs, input); | 293 | shader_unit.LoadInput(regs.vs, input); |
| 292 | shader_engine->Run(g_state.vs, shader_unit); | 294 | shader_engine->Run(g_state.vs, shader_unit); |
| 295 | shader_unit.WriteOutput(regs.vs, output); | ||
| 293 | 296 | ||
| 294 | // Retrieve vertex from register data | 297 | // Retrieve vertex from register data |
| 295 | output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, | 298 | output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs, output); |
| 296 | regs, regs.vs.output_mask); | ||
| 297 | 299 | ||
| 298 | if (is_indexed) { | 300 | if (is_indexed) { |
| 299 | vertex_cache[vertex_cache_pos] = output_vertex; | 301 | vertex_cache[vertex_cache_pos] = output_vertex; |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index dbad167e9..99a22c2dd 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <cmath> | 5 | #include <cmath> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/bit_set.h" | ||
| 7 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 8 | #include "common/microprofile.h" | 9 | #include "common/microprofile.h" |
| 9 | #include "video_core/pica.h" | 10 | #include "video_core/pica.h" |
| @@ -19,22 +20,13 @@ namespace Pica { | |||
| 19 | 20 | ||
| 20 | namespace Shader { | 21 | namespace Shader { |
| 21 | 22 | ||
| 22 | OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | 23 | OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) { |
| 23 | u32 output_mask) { | ||
| 24 | // Setup output data | 24 | // Setup output data |
| 25 | OutputVertex ret; | 25 | OutputVertex ret; |
| 26 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 27 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 28 | unsigned index = 0; | ||
| 29 | for (unsigned i = 0; i < 7; ++i) { | ||
| 30 | 26 | ||
| 31 | if (index >= regs.vs_output_total) | 27 | unsigned int num_attributes = regs.vs_output_total; |
| 32 | break; | 28 | for (unsigned int i = 0; i < num_attributes; ++i) { |
| 33 | 29 | const auto& output_register_map = regs.vs_output_attributes[i]; | |
| 34 | if ((output_mask & (1 << i)) == 0) | ||
| 35 | continue; | ||
| 36 | |||
| 37 | const auto& output_register_map = regs.vs_output_attributes[index]; | ||
| 38 | 30 | ||
| 39 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | 31 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, |
| 40 | output_register_map.map_z, output_register_map.map_w}; | 32 | output_register_map.map_z, output_register_map.map_w}; |
| @@ -42,15 +34,13 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co | |||
| 42 | for (unsigned comp = 0; comp < 4; ++comp) { | 34 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 43 | float24* out = ((float24*)&ret) + semantics[comp]; | 35 | float24* out = ((float24*)&ret) + semantics[comp]; |
| 44 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 36 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
| 45 | *out = output_regs[i][comp]; | 37 | *out = input.attr[i][comp]; |
| 46 | } else { | 38 | } else { |
| 47 | // Zero output so that attributes which aren't output won't have denormals in them, | 39 | // Zero output so that attributes which aren't output won't have denormals in them, |
| 48 | // which would slow us down later. | 40 | // which would slow us down later. |
| 49 | memset(out, 0, sizeof(*out)); | 41 | memset(out, 0, sizeof(*out)); |
| 50 | } | 42 | } |
| 51 | } | 43 | } |
| 52 | |||
| 53 | index++; | ||
| 54 | } | 44 | } |
| 55 | 45 | ||
| 56 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing | 46 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing |
| @@ -80,6 +70,13 @@ void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffe | |||
| 80 | } | 70 | } |
| 81 | } | 71 | } |
| 82 | 72 | ||
| 73 | void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) { | ||
| 74 | unsigned int output_i = 0; | ||
| 75 | for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) { | ||
| 76 | output.attr[output_i++] = registers.output[reg]; | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 83 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 80 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 84 | 81 | ||
| 85 | #ifdef ARCHITECTURE_x86_64 | 82 | #ifdef ARCHITECTURE_x86_64 |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 43a8b848c..00bd723cf 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -74,8 +74,7 @@ struct OutputVertex { | |||
| 74 | return ret; | 74 | return ret; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | 77 | static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output); |
| 78 | u32 output_mask); | ||
| 79 | }; | 78 | }; |
| 80 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 79 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 81 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 80 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| @@ -141,6 +140,8 @@ struct UnitState { | |||
| 141 | * @param input Attribute buffer to load into the input registers. | 140 | * @param input Attribute buffer to load into the input registers. |
| 142 | */ | 141 | */ |
| 143 | void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); | 142 | void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); |
| 143 | |||
| 144 | void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output); | ||
| 144 | }; | 145 | }; |
| 145 | 146 | ||
| 146 | struct ShaderSetup { | 147 | struct ShaderSetup { |