summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2017-02-04 13:02:48 -0800
committerGravatar GitHub2017-02-04 13:02:48 -0800
commit97e06b0a0daccd3347ae1bcaf294093b5af32e85 (patch)
tree59e1997c90558f58f7368d6974c355e1f20d8f32 /src/video_core/shader
parentMerge pull request #2414 from yuriks/texture-decode (diff)
parentVideoCore: Make PrimitiveAssembler const-correct (diff)
downloadyuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.tar.gz
yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.tar.xz
yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.zip
Merge pull request #2476 from yuriks/shader-refactor3
Oh No! More shader changes!
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader.cpp63
-rw-r--r--src/video_core/shader/shader.h62
-rw-r--r--src/video_core/shader/shader_interpreter.cpp6
-rw-r--r--src/video_core/shader/shader_interpreter.h5
4 files changed, 58 insertions, 78 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 2da50bd62..f5f7ea61d 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cmath> 5#include <cmath>
6#include <cstring> 6#include <cstring>
7#include "common/bit_set.h"
7#include "common/logging/log.h" 8#include "common/logging/log.h"
8#include "common/microprofile.h" 9#include "common/microprofile.h"
9#include "video_core/pica.h" 10#include "video_core/pica.h"
@@ -19,38 +20,32 @@ namespace Pica {
19 20
20namespace Shader { 21namespace Shader {
21 22
22OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, 23OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) {
23 u32 output_mask) {
24 // Setup output data 24 // Setup output data
25 OutputVertex ret; 25 union {
26 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to 26 OutputVertex ret{};
27 // figure out what those circumstances are and enable the remaining outputs then. 27 std::array<float24, 24> vertex_slots;
28 unsigned index = 0; 28 };
29 for (unsigned i = 0; i < 7; ++i) { 29 static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes.");
30 30
31 if (index >= regs.vs_output_total) 31 unsigned int num_attributes = regs.vs_output_total;
32 break; 32 ASSERT(num_attributes <= 7);
33 for (unsigned int i = 0; i < num_attributes; ++i) {
34 const auto& output_register_map = regs.vs_output_attributes[i];
33 35
34 if ((output_mask & (1 << i)) == 0) 36 Regs::VSOutputAttributes::Semantic semantics[4] = {
35 continue; 37 output_register_map.map_x, output_register_map.map_y, output_register_map.map_z,
36 38 output_register_map.map_w};
37 const auto& output_register_map = regs.vs_output_attributes[index];
38
39 u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
40 output_register_map.map_z, output_register_map.map_w};
41 39
42 for (unsigned comp = 0; comp < 4; ++comp) { 40 for (unsigned comp = 0; comp < 4; ++comp) {
43 float24* out = ((float24*)&ret) + semantics[comp]; 41 Regs::VSOutputAttributes::Semantic semantic = semantics[comp];
44 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { 42 float24* out = &vertex_slots[semantic];
45 *out = output_regs[i][comp]; 43 if (semantic < vertex_slots.size()) {
46 } else { 44 *out = input.attr[i][comp];
47 // Zero output so that attributes which aren't output won't have denormals in them, 45 } else if (semantic != Regs::VSOutputAttributes::INVALID) {
48 // which would slow us down later. 46 LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic);
49 memset(out, 0, sizeof(*out));
50 } 47 }
51 } 48 }
52
53 index++;
54 } 49 }
55 50
56 // The hardware takes the absolute and saturates vertex colors like this, *before* doing 51 // The hardware takes the absolute and saturates vertex colors like this, *before* doing
@@ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co
71 return ret; 66 return ret;
72} 67}
73 68
74void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { 69void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) {
75 // Setup input register table 70 const unsigned max_attribute = config.max_input_attribute_index;
76 const auto& attribute_register_map = g_state.regs.vs.input_register_map;
77 71
78 for (int i = 0; i < num_attributes; i++) 72 for (unsigned attr = 0; attr <= max_attribute; ++attr) {
79 registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 73 unsigned reg = config.GetRegisterForAttribute(attr);
74 registers.input[reg] = input.attr[attr];
75 }
76}
77
78void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) {
79 unsigned int output_i = 0;
80 for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
81 output.attr[output_i++] = registers.output[reg];
82 }
80} 83}
81 84
82MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); 85MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 44d9f76c3..b188d3edf 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -23,14 +23,11 @@ namespace Pica {
23 23
24namespace Shader { 24namespace Shader {
25 25
26struct InputVertex { 26struct AttributeBuffer {
27 alignas(16) Math::Vec4<float24> attr[16]; 27 alignas(16) Math::Vec4<float24> attr[16];
28}; 28};
29 29
30struct OutputVertex { 30struct OutputVertex {
31 OutputVertex() = default;
32
33 // VS output attributes
34 Math::Vec4<float24> pos; 31 Math::Vec4<float24> pos;
35 Math::Vec4<float24> quat; 32 Math::Vec4<float24> quat;
36 Math::Vec4<float24> color; 33 Math::Vec4<float24> color;
@@ -42,43 +39,22 @@ struct OutputVertex {
42 INSERT_PADDING_WORDS(1); 39 INSERT_PADDING_WORDS(1);
43 Math::Vec2<float24> tc2; 40 Math::Vec2<float24> tc2;
44 41
45 // Padding for optimal alignment 42 static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output);
46 INSERT_PADDING_WORDS(4);
47
48 // Attributes used to store intermediate results
49
50 // position after perspective divide
51 Math::Vec3<float24> screenpos;
52 INSERT_PADDING_WORDS(1);
53
54 // Linear interpolation
55 // factor: 0=this, 1=vtx
56 void Lerp(float24 factor, const OutputVertex& vtx) {
57 pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
58
59 // TODO: Should perform perspective correct interpolation here...
60 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
61 tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
62 tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
63
64 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
65
66 color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
67 }
68
69 // Linear interpolation
70 // factor: 0=v0, 1=v1
71 static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
72 OutputVertex ret = v0;
73 ret.Lerp(factor, v1);
74 return ret;
75 }
76
77 static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs,
78 u32 output_mask);
79}; 43};
44#define ASSERT_POS(var, pos) \
45 static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
46 "offset.")
47ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X);
48ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X);
49ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R);
50ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U);
51ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U);
52ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W);
53ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X);
54ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U);
55#undef ASSERT_POS
80static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 56static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
81static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 57static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
82 58
83/** 59/**
84 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 60 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
@@ -137,10 +113,12 @@ struct UnitState {
137 /** 113 /**
138 * Loads the unit state with an input vertex. 114 * Loads the unit state with an input vertex.
139 * 115 *
140 * @param input Input vertex into the shader 116 * @param config Shader configuration registers corresponding to the unit.
141 * @param num_attributes The number of vertex shader attributes to load 117 * @param input Attribute buffer to load into the input registers.
142 */ 118 */
143 void LoadInputVertex(const InputVertex& input, int num_attributes); 119 void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input);
120
121 void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output);
144}; 122};
145 123
146struct ShaderSetup { 124struct ShaderSetup {
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index c0c89b857..81522b8f5 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {
668} 668}
669 669
670DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, 670DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
671 const InputVertex& input, 671 const AttributeBuffer& input,
672 int num_attributes) const { 672 const Regs::ShaderConfig& config) const {
673 UnitState state; 673 UnitState state;
674 DebugData<true> debug_data; 674 DebugData<true> debug_data;
675 675
676 // Setup input register table 676 // Setup input register table
677 boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); 677 boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
678 state.LoadInputVertex(input, num_attributes); 678 state.LoadInput(config, input);
679 RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); 679 RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
680 return debug_data; 680 return debug_data;
681} 681}
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index d6c0e2d8c..d7a61e122 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -19,12 +19,11 @@ public:
19 /** 19 /**
20 * Produce debug information based on the given shader and input vertex 20 * Produce debug information based on the given shader and input vertex
21 * @param input Input vertex into the shader 21 * @param input Input vertex into the shader
22 * @param num_attributes The number of vertex shader attributes
23 * @param config Configuration object for the shader pipeline 22 * @param config Configuration object for the shader pipeline
24 * @return Debug information for this shader with regards to the given vertex 23 * @return Debug information for this shader with regards to the given vertex
25 */ 24 */
26 DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, 25 DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input,
27 int num_attributes) const; 26 const Regs::ShaderConfig& config) const;
28}; 27};
29 28
30} // namespace 29} // namespace