diff options
| author | 2016-12-17 14:38:03 -0800 | |
|---|---|---|
| committer | 2017-01-25 18:53:25 -0800 | |
| commit | 6fa3687afc97685101f9ee5c65cf98f505980695 (patch) | |
| tree | eb8c3927526cff06dbf9676499ca2e9fc11eda02 /src | |
| parent | Shader: Initialize conditional_code in interpreter (diff) | |
| download | yuzu-6fa3687afc97685101f9ee5c65cf98f505980695.tar.gz yuzu-6fa3687afc97685101f9ee5c65cf98f505980695.tar.xz yuzu-6fa3687afc97685101f9ee5c65cf98f505980695.zip | |
Shader: Remove OutputRegisters struct
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/command_processor.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 17 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 4 |
4 files changed, 17 insertions, 22 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 694c9f169..66d19cba0 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -152,8 +152,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 152 | Shader::UnitState shader_unit; | 152 | Shader::UnitState shader_unit; |
| 153 | shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); | 153 | shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); |
| 154 | shader_engine->Run(shader_unit, regs.vs.main_offset); | 154 | shader_engine->Run(shader_unit, regs.vs.main_offset); |
| 155 | Shader::OutputVertex output_vertex = | 155 | auto output_vertex = Shader::OutputVertex::FromRegisters( |
| 156 | shader_unit.output_registers.ToVertex(regs.vs); | 156 | shader_unit.registers.output, regs, regs.vs.output_mask); |
| 157 | 157 | ||
| 158 | // Send to renderer | 158 | // Send to renderer |
| 159 | using Pica::Shader::OutputVertex; | 159 | using Pica::Shader::OutputVertex; |
| @@ -291,7 +291,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 291 | shader_engine->Run(shader_unit, regs.vs.main_offset); | 291 | shader_engine->Run(shader_unit, regs.vs.main_offset); |
| 292 | 292 | ||
| 293 | // Retrieve vertex from register data | 293 | // Retrieve vertex from register data |
| 294 | output_vertex = shader_unit.output_registers.ToVertex(regs.vs); | 294 | output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, |
| 295 | regs, regs.vs.output_mask); | ||
| 295 | 296 | ||
| 296 | if (is_indexed) { | 297 | if (is_indexed) { |
| 297 | vertex_cache[vertex_cache_pos] = output_vertex; | 298 | vertex_cache[vertex_cache_pos] = output_vertex; |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 1662b5d38..2da50bd62 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -19,7 +19,8 @@ namespace Pica { | |||
| 19 | 19 | ||
| 20 | namespace Shader { | 20 | namespace Shader { |
| 21 | 21 | ||
| 22 | OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | 22 | OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, |
| 23 | u32 output_mask) { | ||
| 23 | // Setup output data | 24 | // Setup output data |
| 24 | OutputVertex ret; | 25 | OutputVertex ret; |
| 25 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 26 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |
| @@ -27,13 +28,13 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 27 | unsigned index = 0; | 28 | unsigned index = 0; |
| 28 | for (unsigned i = 0; i < 7; ++i) { | 29 | for (unsigned i = 0; i < 7; ++i) { |
| 29 | 30 | ||
| 30 | if (index >= g_state.regs.vs_output_total) | 31 | if (index >= regs.vs_output_total) |
| 31 | break; | 32 | break; |
| 32 | 33 | ||
| 33 | if ((config.output_mask & (1 << i)) == 0) | 34 | if ((output_mask & (1 << i)) == 0) |
| 34 | continue; | 35 | continue; |
| 35 | 36 | ||
| 36 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | 37 | const auto& output_register_map = regs.vs_output_attributes[index]; |
| 37 | 38 | ||
| 38 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | 39 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, |
| 39 | output_register_map.map_z, output_register_map.map_w}; | 40 | output_register_map.map_z, output_register_map.map_w}; |
| @@ -41,7 +42,7 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 41 | for (unsigned comp = 0; comp < 4; ++comp) { | 42 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 42 | float24* out = ((float24*)&ret) + semantics[comp]; | 43 | float24* out = ((float24*)&ret) + semantics[comp]; |
| 43 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 44 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
| 44 | *out = value[i][comp]; | 45 | *out = output_regs[i][comp]; |
| 45 | } else { | 46 | } else { |
| 46 | // Zero output so that attributes which aren't output won't have denormals in them, | 47 | // Zero output so that attributes which aren't output won't have denormals in them, |
| 47 | // which would slow us down later. | 48 | // which would slow us down later. |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 9d2410487..7d51d0044 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -73,19 +73,13 @@ struct OutputVertex { | |||
| 73 | ret.Lerp(factor, v1); | 73 | ret.Lerp(factor, v1); |
| 74 | return ret; | 74 | return ret; |
| 75 | } | 75 | } |
| 76 | |||
| 77 | static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | ||
| 78 | u32 output_mask); | ||
| 76 | }; | 79 | }; |
| 77 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 80 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 78 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 81 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 79 | 82 | ||
| 80 | struct OutputRegisters { | ||
| 81 | OutputRegisters() = default; | ||
| 82 | |||
| 83 | alignas(16) Math::Vec4<float24> value[16]; | ||
| 84 | |||
| 85 | OutputVertex ToVertex(const Regs::ShaderConfig& config) const; | ||
| 86 | }; | ||
| 87 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); | ||
| 88 | |||
| 89 | /** | 83 | /** |
| 90 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 84 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 91 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 85 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| @@ -98,11 +92,10 @@ struct UnitState { | |||
| 98 | // required to be 16-byte aligned. | 92 | // required to be 16-byte aligned. |
| 99 | alignas(16) Math::Vec4<float24> input[16]; | 93 | alignas(16) Math::Vec4<float24> input[16]; |
| 100 | alignas(16) Math::Vec4<float24> temporary[16]; | 94 | alignas(16) Math::Vec4<float24> temporary[16]; |
| 95 | alignas(16) Math::Vec4<float24> output[16]; | ||
| 101 | } registers; | 96 | } registers; |
| 102 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | 97 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); |
| 103 | 98 | ||
| 104 | OutputRegisters output_registers; | ||
| 105 | |||
| 106 | bool conditional_code[2]; | 99 | bool conditional_code[2]; |
| 107 | 100 | ||
| 108 | // Two Address registers and one loop counter | 101 | // Two Address registers and one loop counter |
| @@ -128,7 +121,7 @@ struct UnitState { | |||
| 128 | static size_t OutputOffset(const DestRegister& reg) { | 121 | static size_t OutputOffset(const DestRegister& reg) { |
| 129 | switch (reg.GetRegisterType()) { | 122 | switch (reg.GetRegisterType()) { |
| 130 | case RegisterType::Output: | 123 | case RegisterType::Output: |
| 131 | return offsetof(UnitState, output_registers.value) + | 124 | return offsetof(UnitState, registers.output) + |
| 132 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | 125 | reg.GetIndex() * sizeof(Math::Vec4<float24>); |
| 133 | 126 | ||
| 134 | case RegisterType::Temporary: | 127 | case RegisterType::Temporary: |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index ecc227089..a6197c10a 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -175,7 +175,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | |||
| 175 | 175 | ||
| 176 | float24* dest = | 176 | float24* dest = |
| 177 | (instr.common.dest.Value() < 0x10) | 177 | (instr.common.dest.Value() < 0x10) |
| 178 | ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] | 178 | ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] |
| 179 | : (instr.common.dest.Value() < 0x20) | 179 | : (instr.common.dest.Value() < 0x20) |
| 180 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | 180 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] |
| 181 | : dummy_vec4_float24; | 181 | : dummy_vec4_float24; |
| @@ -518,7 +518,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | |||
| 518 | 518 | ||
| 519 | float24* dest = | 519 | float24* dest = |
| 520 | (instr.mad.dest.Value() < 0x10) | 520 | (instr.mad.dest.Value() < 0x10) |
| 521 | ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] | 521 | ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] |
| 522 | : (instr.mad.dest.Value() < 0x20) | 522 | : (instr.mad.dest.Value() < 0x20) |
| 523 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 523 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 524 | : dummy_vec4_float24; | 524 | : dummy_vec4_float24; |