diff options
| author | 2017-02-04 13:02:48 -0800 | |
|---|---|---|
| committer | 2017-02-04 13:02:48 -0800 | |
| commit | 97e06b0a0daccd3347ae1bcaf294093b5af32e85 (patch) | |
| tree | 59e1997c90558f58f7368d6974c355e1f20d8f32 /src/video_core/shader | |
| parent | Merge pull request #2414 from yuriks/texture-decode (diff) | |
| parent | VideoCore: Make PrimitiveAssembler const-correct (diff) | |
| download | yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.tar.gz yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.tar.xz yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.zip | |
Merge pull request #2476 from yuriks/shader-refactor3
Oh No! More shader changes!
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 63 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 62 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 5 |
4 files changed, 58 insertions, 78 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2da50bd62..f5f7ea61d 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <cmath> | 5 | #include <cmath> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/bit_set.h" | ||
| 7 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 8 | #include "common/microprofile.h" | 9 | #include "common/microprofile.h" |
| 9 | #include "video_core/pica.h" | 10 | #include "video_core/pica.h" |
| @@ -19,38 +20,32 @@ namespace Pica { | |||
| 19 | 20 | ||
| 20 | namespace Shader { | 21 | namespace Shader { |
| 21 | 22 | ||
| 22 | OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | 23 | OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) { |
| 23 | u32 output_mask) { | ||
| 24 | // Setup output data | 24 | // Setup output data |
| 25 | OutputVertex ret; | 25 | union { |
| 26 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 26 | OutputVertex ret{}; |
| 27 | // figure out what those circumstances are and enable the remaining outputs then. | 27 | std::array<float24, 24> vertex_slots; |
| 28 | unsigned index = 0; | 28 | }; |
| 29 | for (unsigned i = 0; i < 7; ++i) { | 29 | static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes."); |
| 30 | 30 | ||
| 31 | if (index >= regs.vs_output_total) | 31 | unsigned int num_attributes = regs.vs_output_total; |
| 32 | break; | 32 | ASSERT(num_attributes <= 7); |
| 33 | for (unsigned int i = 0; i < num_attributes; ++i) { | ||
| 34 | const auto& output_register_map = regs.vs_output_attributes[i]; | ||
| 33 | 35 | ||
| 34 | if ((output_mask & (1 << i)) == 0) | 36 | Regs::VSOutputAttributes::Semantic semantics[4] = { |
| 35 | continue; | 37 | output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, |
| 36 | 38 | output_register_map.map_w}; | |
| 37 | const auto& output_register_map = regs.vs_output_attributes[index]; | ||
| 38 | |||
| 39 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | ||
| 40 | output_register_map.map_z, output_register_map.map_w}; | ||
| 41 | 39 | ||
| 42 | for (unsigned comp = 0; comp < 4; ++comp) { | 40 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 43 | float24* out = ((float24*)&ret) + semantics[comp]; | 41 | Regs::VSOutputAttributes::Semantic semantic = semantics[comp]; |
| 44 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 42 | float24* out = &vertex_slots[semantic]; |
| 45 | *out = output_regs[i][comp]; | 43 | if (semantic < vertex_slots.size()) { |
| 46 | } else { | 44 | *out = input.attr[i][comp]; |
| 47 | // Zero output so that attributes which aren't output won't have denormals in them, | 45 | } else if (semantic != Regs::VSOutputAttributes::INVALID) { |
| 48 | // which would slow us down later. | 46 | LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic); |
| 49 | memset(out, 0, sizeof(*out)); | ||
| 50 | } | 47 | } |
| 51 | } | 48 | } |
| 52 | |||
| 53 | index++; | ||
| 54 | } | 49 | } |
| 55 | 50 | ||
| 56 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing | 51 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing |
| @@ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co | |||
| 71 | return ret; | 66 | return ret; |
| 72 | } | 67 | } |
| 73 | 68 | ||
| 74 | void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { | 69 | void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) { |
| 75 | // Setup input register table | 70 | const unsigned max_attribute = config.max_input_attribute_index; |
| 76 | const auto& attribute_register_map = g_state.regs.vs.input_register_map; | ||
| 77 | 71 | ||
| 78 | for (int i = 0; i < num_attributes; i++) | 72 | for (unsigned attr = 0; attr <= max_attribute; ++attr) { |
| 79 | registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | 73 | unsigned reg = config.GetRegisterForAttribute(attr); |
| 74 | registers.input[reg] = input.attr[attr]; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) { | ||
| 79 | unsigned int output_i = 0; | ||
| 80 | for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) { | ||
| 81 | output.attr[output_i++] = registers.output[reg]; | ||
| 82 | } | ||
| 80 | } | 83 | } |
| 81 | 84 | ||
| 82 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 85 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 44d9f76c3..b188d3edf 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -23,14 +23,11 @@ namespace Pica { | |||
| 23 | 23 | ||
| 24 | namespace Shader { | 24 | namespace Shader { |
| 25 | 25 | ||
| 26 | struct InputVertex { | 26 | struct AttributeBuffer { |
| 27 | alignas(16) Math::Vec4<float24> attr[16]; | 27 | alignas(16) Math::Vec4<float24> attr[16]; |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | struct OutputVertex { | 30 | struct OutputVertex { |
| 31 | OutputVertex() = default; | ||
| 32 | |||
| 33 | // VS output attributes | ||
| 34 | Math::Vec4<float24> pos; | 31 | Math::Vec4<float24> pos; |
| 35 | Math::Vec4<float24> quat; | 32 | Math::Vec4<float24> quat; |
| 36 | Math::Vec4<float24> color; | 33 | Math::Vec4<float24> color; |
| @@ -42,43 +39,22 @@ struct OutputVertex { | |||
| 42 | INSERT_PADDING_WORDS(1); | 39 | INSERT_PADDING_WORDS(1); |
| 43 | Math::Vec2<float24> tc2; | 40 | Math::Vec2<float24> tc2; |
| 44 | 41 | ||
| 45 | // Padding for optimal alignment | 42 | static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output); |
| 46 | INSERT_PADDING_WORDS(4); | ||
| 47 | |||
| 48 | // Attributes used to store intermediate results | ||
| 49 | |||
| 50 | // position after perspective divide | ||
| 51 | Math::Vec3<float24> screenpos; | ||
| 52 | INSERT_PADDING_WORDS(1); | ||
| 53 | |||
| 54 | // Linear interpolation | ||
| 55 | // factor: 0=this, 1=vtx | ||
| 56 | void Lerp(float24 factor, const OutputVertex& vtx) { | ||
| 57 | pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||
| 58 | |||
| 59 | // TODO: Should perform perspective correct interpolation here... | ||
| 60 | tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||
| 61 | tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); | ||
| 62 | tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); | ||
| 63 | |||
| 64 | screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||
| 65 | |||
| 66 | color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||
| 67 | } | ||
| 68 | |||
| 69 | // Linear interpolation | ||
| 70 | // factor: 0=v0, 1=v1 | ||
| 71 | static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { | ||
| 72 | OutputVertex ret = v0; | ||
| 73 | ret.Lerp(factor, v1); | ||
| 74 | return ret; | ||
| 75 | } | ||
| 76 | |||
| 77 | static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | ||
| 78 | u32 output_mask); | ||
| 79 | }; | 43 | }; |
| 44 | #define ASSERT_POS(var, pos) \ | ||
| 45 | static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ | ||
| 46 | "offset.") | ||
| 47 | ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X); | ||
| 48 | ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X); | ||
| 49 | ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R); | ||
| 50 | ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U); | ||
| 51 | ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U); | ||
| 52 | ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W); | ||
| 53 | ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X); | ||
| 54 | ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U); | ||
| 55 | #undef ASSERT_POS | ||
| 80 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 56 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 81 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 57 | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); |
| 82 | 58 | ||
| 83 | /** | 59 | /** |
| 84 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 60 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| @@ -137,10 +113,12 @@ struct UnitState { | |||
| 137 | /** | 113 | /** |
| 138 | * Loads the unit state with an input vertex. | 114 | * Loads the unit state with an input vertex. |
| 139 | * | 115 | * |
| 140 | * @param input Input vertex into the shader | 116 | * @param config Shader configuration registers corresponding to the unit. |
| 141 | * @param num_attributes The number of vertex shader attributes to load | 117 | * @param input Attribute buffer to load into the input registers. |
| 142 | */ | 118 | */ |
| 143 | void LoadInputVertex(const InputVertex& input, int num_attributes); | 119 | void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); |
| 120 | |||
| 121 | void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output); | ||
| 144 | }; | 122 | }; |
| 145 | 123 | ||
| 146 | struct ShaderSetup { | 124 | struct ShaderSetup { |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c0c89b857..81522b8f5 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { | |||
| 668 | } | 668 | } |
| 669 | 669 | ||
| 670 | DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, | 670 | DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, |
| 671 | const InputVertex& input, | 671 | const AttributeBuffer& input, |
| 672 | int num_attributes) const { | 672 | const Regs::ShaderConfig& config) const { |
| 673 | UnitState state; | 673 | UnitState state; |
| 674 | DebugData<true> debug_data; | 674 | DebugData<true> debug_data; |
| 675 | 675 | ||
| 676 | // Setup input register table | 676 | // Setup input register table |
| 677 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | 677 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); |
| 678 | state.LoadInputVertex(input, num_attributes); | 678 | state.LoadInput(config, input); |
| 679 | RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); | 679 | RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); |
| 680 | return debug_data; | 680 | return debug_data; |
| 681 | } | 681 | } |
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d6c0e2d8c..d7a61e122 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -19,12 +19,11 @@ public: | |||
| 19 | /** | 19 | /** |
| 20 | * Produce debug information based on the given shader and input vertex | 20 | * Produce debug information based on the given shader and input vertex |
| 21 | * @param input Input vertex into the shader | 21 | * @param input Input vertex into the shader |
| 22 | * @param num_attributes The number of vertex shader attributes | ||
| 23 | * @param config Configuration object for the shader pipeline | 22 | * @param config Configuration object for the shader pipeline |
| 24 | * @return Debug information for this shader with regards to the given vertex | 23 | * @return Debug information for this shader with regards to the given vertex |
| 25 | */ | 24 | */ |
| 26 | DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, | 25 | DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, |
| 27 | int num_attributes) const; | 26 | const Regs::ShaderConfig& config) const; |
| 28 | }; | 27 | }; |
| 29 | 28 | ||
| 30 | } // namespace | 29 | } // namespace |