diff options
| author | 2017-02-04 13:02:48 -0800 | |
|---|---|---|
| committer | 2017-02-04 13:02:48 -0800 | |
| commit | 97e06b0a0daccd3347ae1bcaf294093b5af32e85 (patch) | |
| tree | 59e1997c90558f58f7368d6974c355e1f20d8f32 /src | |
| parent | Merge pull request #2414 from yuriks/texture-decode (diff) | |
| parent | VideoCore: Make PrimitiveAssembler const-correct (diff) | |
| download | yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.tar.gz yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.tar.xz yuzu-97e06b0a0daccd3347ae1bcaf294093b5af32e85.zip | |
Merge pull request #2476 from yuriks/shader-refactor3
Oh No! More shader changes!
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/debugger/graphics/graphics_tracing.cpp | 4 | ||||
| -rw-r--r-- | src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp | 6 | ||||
| -rw-r--r-- | src/citra_qt/debugger/graphics/graphics_vertex_shader.h | 2 | ||||
| -rw-r--r-- | src/common/bit_set.h | 33 | ||||
| -rw-r--r-- | src/video_core/clipper.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/pica.h | 57 | ||||
| -rw-r--r-- | src/video_core/pica_state.h | 4 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.h | 5 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/rasterizer.h | 40 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 63 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 62 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 5 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.h | 4 |
20 files changed, 185 insertions, 181 deletions
diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.cpp b/src/citra_qt/debugger/graphics/graphics_tracing.cpp index 716ed50b8..17f1c5ce2 100644 --- a/src/citra_qt/debugger/graphics/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics/graphics_tracing.cpp | |||
| @@ -71,8 +71,8 @@ void GraphicsTracingWidget::StartRecording() { | |||
| 71 | std::array<u32, 4 * 16> default_attributes; | 71 | std::array<u32, 4 * 16> default_attributes; |
| 72 | for (unsigned i = 0; i < 16; ++i) { | 72 | for (unsigned i = 0; i < 16; ++i) { |
| 73 | for (unsigned comp = 0; comp < 3; ++comp) { | 73 | for (unsigned comp = 0; comp < 3; ++comp) { |
| 74 | default_attributes[4 * i + comp] = | 74 | default_attributes[4 * i + comp] = nihstro::to_float24( |
| 75 | nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); | 75 | Pica::g_state.input_default_attributes.attr[i][comp].ToFloat32()); |
| 76 | } | 76 | } |
| 77 | } | 77 | } |
| 78 | 78 | ||
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index f37524190..489ec5f21 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp | |||
| @@ -511,7 +511,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d | |||
| 511 | auto& shader_config = Pica::g_state.regs.vs; | 511 | auto& shader_config = Pica::g_state.regs.vs; |
| 512 | for (auto instr : shader_setup.program_code) | 512 | for (auto instr : shader_setup.program_code) |
| 513 | info.code.push_back({instr}); | 513 | info.code.push_back({instr}); |
| 514 | int num_attributes = Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); | 514 | int num_attributes = shader_config.max_input_attribute_index + 1; |
| 515 | 515 | ||
| 516 | for (auto pattern : shader_setup.swizzle_data) | 516 | for (auto pattern : shader_setup.swizzle_data) |
| 517 | info.swizzle_info.push_back({pattern}); | 517 | info.swizzle_info.push_back({pattern}); |
| @@ -522,11 +522,11 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d | |||
| 522 | // Generate debug information | 522 | // Generate debug information |
| 523 | Pica::Shader::InterpreterEngine shader_engine; | 523 | Pica::Shader::InterpreterEngine shader_engine; |
| 524 | shader_engine.SetupBatch(shader_setup, entry_point); | 524 | shader_engine.SetupBatch(shader_setup, entry_point); |
| 525 | debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes); | 525 | debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, shader_config); |
| 526 | 526 | ||
| 527 | // Reload widget state | 527 | // Reload widget state |
| 528 | for (int attr = 0; attr < num_attributes; ++attr) { | 528 | for (int attr = 0; attr < num_attributes; ++attr) { |
| 529 | unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); | 529 | unsigned source_attr = shader_config.GetRegisterForAttribute(attr); |
| 530 | input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr)); | 530 | input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr)); |
| 531 | input_data_container[attr]->setVisible(true); | 531 | input_data_container[attr]->setVisible(true); |
| 532 | } | 532 | } |
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h index 3292573f3..c249a2ff8 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h | |||
| @@ -82,7 +82,7 @@ private: | |||
| 82 | 82 | ||
| 83 | nihstro::ShaderInfo info; | 83 | nihstro::ShaderInfo info; |
| 84 | Pica::Shader::DebugData<true> debug_data; | 84 | Pica::Shader::DebugData<true> debug_data; |
| 85 | Pica::Shader::InputVertex input_vertex; | 85 | Pica::Shader::AttributeBuffer input_vertex; |
| 86 | 86 | ||
| 87 | friend class GraphicsVertexShaderModel; | 87 | friend class GraphicsVertexShaderModel; |
| 88 | }; | 88 | }; |
diff --git a/src/common/bit_set.h b/src/common/bit_set.h index 3059d0cb0..9c2e6b28c 100644 --- a/src/common/bit_set.h +++ b/src/common/bit_set.h | |||
| @@ -121,22 +121,19 @@ public: | |||
| 121 | class Iterator { | 121 | class Iterator { |
| 122 | public: | 122 | public: |
| 123 | Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} | 123 | Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} |
| 124 | Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {} | 124 | Iterator(IntTy val) : m_val(val), m_bit(0) {} |
| 125 | Iterator& operator=(Iterator other) { | 125 | Iterator& operator=(Iterator other) { |
| 126 | new (this) Iterator(other); | 126 | new (this) Iterator(other); |
| 127 | return *this; | 127 | return *this; |
| 128 | } | 128 | } |
| 129 | int operator*() { | 129 | int operator*() { |
| 130 | return m_bit; | 130 | return m_bit + ComputeLsb(); |
| 131 | } | 131 | } |
| 132 | Iterator& operator++() { | 132 | Iterator& operator++() { |
| 133 | if (m_val == 0) { | 133 | int lsb = ComputeLsb(); |
| 134 | m_bit = -1; | 134 | m_val >>= lsb + 1; |
| 135 | } else { | 135 | m_bit += lsb + 1; |
| 136 | int bit = LeastSignificantSetBit(m_val); | 136 | m_has_lsb = false; |
| 137 | m_val &= ~(1 << bit); | ||
| 138 | m_bit = bit; | ||
| 139 | } | ||
| 140 | return *this; | 137 | return *this; |
| 141 | } | 138 | } |
| 142 | Iterator operator++(int _) { | 139 | Iterator operator++(int _) { |
| @@ -145,15 +142,24 @@ public: | |||
| 145 | return other; | 142 | return other; |
| 146 | } | 143 | } |
| 147 | bool operator==(Iterator other) const { | 144 | bool operator==(Iterator other) const { |
| 148 | return m_bit == other.m_bit; | 145 | return m_val == other.m_val; |
| 149 | } | 146 | } |
| 150 | bool operator!=(Iterator other) const { | 147 | bool operator!=(Iterator other) const { |
| 151 | return m_bit != other.m_bit; | 148 | return m_val != other.m_val; |
| 152 | } | 149 | } |
| 153 | 150 | ||
| 154 | private: | 151 | private: |
| 152 | int ComputeLsb() { | ||
| 153 | if (!m_has_lsb) { | ||
| 154 | m_lsb = LeastSignificantSetBit(m_val); | ||
| 155 | m_has_lsb = true; | ||
| 156 | } | ||
| 157 | return m_lsb; | ||
| 158 | } | ||
| 155 | IntTy m_val; | 159 | IntTy m_val; |
| 156 | int m_bit; | 160 | int m_bit; |
| 161 | int m_lsb = -1; | ||
| 162 | bool m_has_lsb = false; | ||
| 157 | }; | 163 | }; |
| 158 | 164 | ||
| 159 | BitSet() : m_val(0) {} | 165 | BitSet() : m_val(0) {} |
| @@ -221,11 +227,10 @@ public: | |||
| 221 | } | 227 | } |
| 222 | 228 | ||
| 223 | Iterator begin() const { | 229 | Iterator begin() const { |
| 224 | Iterator it(m_val, 0); | 230 | return Iterator(m_val); |
| 225 | return ++it; | ||
| 226 | } | 231 | } |
| 227 | Iterator end() const { | 232 | Iterator end() const { |
| 228 | return Iterator(m_val, -1); | 233 | return Iterator(0); |
| 229 | } | 234 | } |
| 230 | 235 | ||
| 231 | IntTy m_val; | 236 | IntTy m_val; |
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 05b5cea73..0774ffc53 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #include "video_core/rasterizer.h" | 18 | #include "video_core/rasterizer.h" |
| 19 | #include "video_core/shader/shader.h" | 19 | #include "video_core/shader/shader.h" |
| 20 | 20 | ||
| 21 | using Pica::Rasterizer::Vertex; | ||
| 22 | |||
| 21 | namespace Pica { | 23 | namespace Pica { |
| 22 | 24 | ||
| 23 | namespace Clipper { | 25 | namespace Clipper { |
| @@ -29,20 +31,20 @@ public: | |||
| 29 | float24::FromFloat32(0), float24::FromFloat32(0))) | 31 | float24::FromFloat32(0), float24::FromFloat32(0))) |
| 30 | : coeffs(coeffs), bias(bias) {} | 32 | : coeffs(coeffs), bias(bias) {} |
| 31 | 33 | ||
| 32 | bool IsInside(const OutputVertex& vertex) const { | 34 | bool IsInside(const Vertex& vertex) const { |
| 33 | return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); | 35 | return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); |
| 34 | } | 36 | } |
| 35 | 37 | ||
| 36 | bool IsOutSide(const OutputVertex& vertex) const { | 38 | bool IsOutSide(const Vertex& vertex) const { |
| 37 | return !IsInside(vertex); | 39 | return !IsInside(vertex); |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 40 | OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { | 42 | Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const { |
| 41 | float24 dp = Math::Dot(v0.pos + bias, coeffs); | 43 | float24 dp = Math::Dot(v0.pos + bias, coeffs); |
| 42 | float24 dp_prev = Math::Dot(v1.pos + bias, coeffs); | 44 | float24 dp_prev = Math::Dot(v1.pos + bias, coeffs); |
| 43 | float24 factor = dp_prev / (dp_prev - dp); | 45 | float24 factor = dp_prev / (dp_prev - dp); |
| 44 | 46 | ||
| 45 | return OutputVertex::Lerp(factor, v0, v1); | 47 | return Vertex::Lerp(factor, v0, v1); |
| 46 | } | 48 | } |
| 47 | 49 | ||
| 48 | private: | 50 | private: |
| @@ -51,7 +53,7 @@ private: | |||
| 51 | Math::Vec4<float24> bias; | 53 | Math::Vec4<float24> bias; |
| 52 | }; | 54 | }; |
| 53 | 55 | ||
| 54 | static void InitScreenCoordinates(OutputVertex& vtx) { | 56 | static void InitScreenCoordinates(Vertex& vtx) { |
| 55 | struct { | 57 | struct { |
| 56 | float24 halfsize_x; | 58 | float24 halfsize_x; |
| 57 | float24 offset_x; | 59 | float24 offset_x; |
| @@ -91,8 +93,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu | |||
| 91 | // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a | 93 | // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a |
| 92 | // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. | 94 | // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. |
| 93 | static const size_t MAX_VERTICES = 9; | 95 | static const size_t MAX_VERTICES = 9; |
| 94 | static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; | 96 | static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; |
| 95 | static_vector<OutputVertex, MAX_VERTICES> buffer_b; | 97 | static_vector<Vertex, MAX_VERTICES> buffer_b; |
| 96 | auto* output_list = &buffer_a; | 98 | auto* output_list = &buffer_a; |
| 97 | auto* input_list = &buffer_b; | 99 | auto* input_list = &buffer_b; |
| 98 | 100 | ||
| @@ -123,7 +125,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu | |||
| 123 | std::swap(input_list, output_list); | 125 | std::swap(input_list, output_list); |
| 124 | output_list->clear(); | 126 | output_list->clear(); |
| 125 | 127 | ||
| 126 | const OutputVertex* reference_vertex = &input_list->back(); | 128 | const Vertex* reference_vertex = &input_list->back(); |
| 127 | 129 | ||
| 128 | for (const auto& vertex : *input_list) { | 130 | for (const auto& vertex : *input_list) { |
| 129 | // NOTE: This algorithm changes vertex order in some cases! | 131 | // NOTE: This algorithm changes vertex order in some cases! |
| @@ -148,9 +150,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu | |||
| 148 | InitScreenCoordinates((*output_list)[1]); | 150 | InitScreenCoordinates((*output_list)[1]); |
| 149 | 151 | ||
| 150 | for (size_t i = 0; i < output_list->size() - 2; i++) { | 152 | for (size_t i = 0; i < output_list->size() - 2; i++) { |
| 151 | OutputVertex& vtx0 = (*output_list)[0]; | 153 | Vertex& vtx0 = (*output_list)[0]; |
| 152 | OutputVertex& vtx1 = (*output_list)[i + 1]; | 154 | Vertex& vtx1 = (*output_list)[i + 1]; |
| 153 | OutputVertex& vtx2 = (*output_list)[i + 2]; | 155 | Vertex& vtx2 = (*output_list)[i + 2]; |
| 154 | 156 | ||
| 155 | InitScreenCoordinates(vtx2); | 157 | InitScreenCoordinates(vtx2); |
| 156 | 158 | ||
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index eb79974a8..4955ff9f9 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -125,20 +125,21 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 125 | 125 | ||
| 126 | // TODO: Verify that this actually modifies the register! | 126 | // TODO: Verify that this actually modifies the register! |
| 127 | if (setup.index < 15) { | 127 | if (setup.index < 15) { |
| 128 | g_state.vs_default_attributes[setup.index] = attribute; | 128 | g_state.input_default_attributes.attr[setup.index] = attribute; |
| 129 | setup.index++; | 129 | setup.index++; |
| 130 | } else { | 130 | } else { |
| 131 | // Put each attribute into an immediate input buffer. | 131 | // Put each attribute into an immediate input buffer. When all specified immediate |
| 132 | // When all specified immediate attributes are present, the Vertex Shader is invoked | 132 | // attributes are present, the Vertex Shader is invoked and everything is sent to |
| 133 | // and everything is | 133 | // the primitive assembler. |
| 134 | // sent to the primitive assembler. | ||
| 135 | 134 | ||
| 136 | auto& immediate_input = g_state.immediate.input_vertex; | 135 | auto& immediate_input = g_state.immediate.input_vertex; |
| 137 | auto& immediate_attribute_id = g_state.immediate.current_attribute; | 136 | auto& immediate_attribute_id = g_state.immediate.current_attribute; |
| 138 | 137 | ||
| 139 | immediate_input.attr[immediate_attribute_id++] = attribute; | 138 | immediate_input.attr[immediate_attribute_id] = attribute; |
| 140 | 139 | ||
| 141 | if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { | 140 | if (immediate_attribute_id < regs.max_input_attrib_index) { |
| 141 | immediate_attribute_id += 1; | ||
| 142 | } else { | ||
| 142 | MICROPROFILE_SCOPE(GPU_Drawing); | 143 | MICROPROFILE_SCOPE(GPU_Drawing); |
| 143 | immediate_attribute_id = 0; | 144 | immediate_attribute_id = 0; |
| 144 | 145 | ||
| @@ -150,10 +151,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 150 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | 151 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, |
| 151 | static_cast<void*>(&immediate_input)); | 152 | static_cast<void*>(&immediate_input)); |
| 152 | Shader::UnitState shader_unit; | 153 | Shader::UnitState shader_unit; |
| 153 | shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); | 154 | Shader::AttributeBuffer output{}; |
| 155 | |||
| 156 | shader_unit.LoadInput(regs.vs, immediate_input); | ||
| 154 | shader_engine->Run(g_state.vs, shader_unit); | 157 | shader_engine->Run(g_state.vs, shader_unit); |
| 155 | auto output_vertex = Shader::OutputVertex::FromRegisters( | 158 | shader_unit.WriteOutput(regs.vs, output); |
| 156 | shader_unit.registers.output, regs, regs.vs.output_mask); | ||
| 157 | 159 | ||
| 158 | // Send to renderer | 160 | // Send to renderer |
| 159 | using Pica::Shader::OutputVertex; | 161 | using Pica::Shader::OutputVertex; |
| @@ -162,7 +164,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 162 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | 164 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); |
| 163 | }; | 165 | }; |
| 164 | 166 | ||
| 165 | g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | 167 | g_state.primitive_assembler.SubmitVertex( |
| 168 | Shader::OutputVertex::FromAttributeBuffer(regs, output), AddTriangle); | ||
| 166 | } | 169 | } |
| 167 | } | 170 | } |
| 168 | } | 171 | } |
| @@ -280,19 +283,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 280 | 283 | ||
| 281 | if (!vertex_cache_hit) { | 284 | if (!vertex_cache_hit) { |
| 282 | // Initialize data for the current vertex | 285 | // Initialize data for the current vertex |
| 283 | Shader::InputVertex input; | 286 | Shader::AttributeBuffer input, output{}; |
| 284 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | 287 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); |
| 285 | 288 | ||
| 286 | // Send to vertex shader | 289 | // Send to vertex shader |
| 287 | if (g_debug_context) | 290 | if (g_debug_context) |
| 288 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | 291 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, |
| 289 | (void*)&input); | 292 | (void*)&input); |
| 290 | shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); | 293 | shader_unit.LoadInput(regs.vs, input); |
| 291 | shader_engine->Run(g_state.vs, shader_unit); | 294 | shader_engine->Run(g_state.vs, shader_unit); |
| 295 | shader_unit.WriteOutput(regs.vs, output); | ||
| 292 | 296 | ||
| 293 | // Retrieve vertex from register data | 297 | // Retrieve vertex from register data |
| 294 | output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, | 298 | output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs, output); |
| 295 | regs, regs.vs.output_mask); | ||
| 296 | 299 | ||
| 297 | if (is_indexed) { | 300 | if (is_indexed) { |
| 298 | vertex_cache[vertex_cache_pos] = output_vertex; | 301 | vertex_cache[vertex_cache_pos] = output_vertex; |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 4ab4f1f40..731540b99 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -99,7 +99,8 @@ struct Regs { | |||
| 99 | TEXCOORD1_U = 14, | 99 | TEXCOORD1_U = 14, |
| 100 | TEXCOORD1_V = 15, | 100 | TEXCOORD1_V = 15, |
| 101 | 101 | ||
| 102 | // TODO: Not verified | 102 | TEXCOORD0_W = 16, |
| 103 | |||
| 103 | VIEW_X = 18, | 104 | VIEW_X = 18, |
| 104 | VIEW_Y = 19, | 105 | VIEW_Y = 19, |
| 105 | VIEW_Z = 20, | 106 | VIEW_Z = 20, |
| @@ -871,7 +872,7 @@ struct Regs { | |||
| 871 | LightSrc light[8]; | 872 | LightSrc light[8]; |
| 872 | LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) | 873 | LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) |
| 873 | INSERT_PADDING_WORDS(0x1); | 874 | INSERT_PADDING_WORDS(0x1); |
| 874 | BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 | 875 | BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1 |
| 875 | 876 | ||
| 876 | union { | 877 | union { |
| 877 | BitField<2, 2, LightingFresnelSelector> fresnel_selector; | 878 | BitField<2, 2, LightingFresnelSelector> fresnel_selector; |
| @@ -1048,7 +1049,7 @@ struct Regs { | |||
| 1048 | BitField<48, 12, u64> attribute_mask; | 1049 | BitField<48, 12, u64> attribute_mask; |
| 1049 | 1050 | ||
| 1050 | // number of total attributes minus 1 | 1051 | // number of total attributes minus 1 |
| 1051 | BitField<60, 4, u64> num_extra_attributes; | 1052 | BitField<60, 4, u64> max_attribute_index; |
| 1052 | }; | 1053 | }; |
| 1053 | 1054 | ||
| 1054 | inline VertexAttributeFormat GetFormat(int n) const { | 1055 | inline VertexAttributeFormat GetFormat(int n) const { |
| @@ -1079,7 +1080,7 @@ struct Regs { | |||
| 1079 | } | 1080 | } |
| 1080 | 1081 | ||
| 1081 | inline int GetNumTotalAttributes() const { | 1082 | inline int GetNumTotalAttributes() const { |
| 1082 | return (int)num_extra_attributes + 1; | 1083 | return (int)max_attribute_index + 1; |
| 1083 | } | 1084 | } |
| 1084 | 1085 | ||
| 1085 | // Attribute loaders map the source vertex data to input attributes | 1086 | // Attribute loaders map the source vertex data to input attributes |
| @@ -1179,7 +1180,12 @@ struct Regs { | |||
| 1179 | } | 1180 | } |
| 1180 | } command_buffer; | 1181 | } command_buffer; |
| 1181 | 1182 | ||
| 1182 | INSERT_PADDING_WORDS(0x07); | 1183 | INSERT_PADDING_WORDS(4); |
| 1184 | |||
| 1185 | /// Number of input attributes to the vertex shader minus 1 | ||
| 1186 | BitField<0, 4, u32> max_input_attrib_index; | ||
| 1187 | |||
| 1188 | INSERT_PADDING_WORDS(2); | ||
| 1183 | 1189 | ||
| 1184 | enum class GPUMode : u32 { | 1190 | enum class GPUMode : u32 { |
| 1185 | Drawing = 0, | 1191 | Drawing = 0, |
| @@ -1217,42 +1223,21 @@ struct Regs { | |||
| 1217 | 1223 | ||
| 1218 | union { | 1224 | union { |
| 1219 | // Number of input attributes to shader unit - 1 | 1225 | // Number of input attributes to shader unit - 1 |
| 1220 | BitField<0, 4, u32> num_input_attributes; | 1226 | BitField<0, 4, u32> max_input_attribute_index; |
| 1221 | }; | 1227 | }; |
| 1222 | 1228 | ||
| 1223 | // Offset to shader program entry point (in words) | 1229 | // Offset to shader program entry point (in words) |
| 1224 | BitField<0, 16, u32> main_offset; | 1230 | BitField<0, 16, u32> main_offset; |
| 1225 | 1231 | ||
| 1226 | union { | 1232 | /// Maps input attributes to registers. 4-bits per attribute, specifying a register index |
| 1227 | BitField<0, 4, u64> attribute0_register; | 1233 | u32 input_attribute_to_register_map_low; |
| 1228 | BitField<4, 4, u64> attribute1_register; | 1234 | u32 input_attribute_to_register_map_high; |
| 1229 | BitField<8, 4, u64> attribute2_register; | 1235 | |
| 1230 | BitField<12, 4, u64> attribute3_register; | 1236 | unsigned int GetRegisterForAttribute(unsigned int attribute_index) const { |
| 1231 | BitField<16, 4, u64> attribute4_register; | 1237 | u64 map = ((u64)input_attribute_to_register_map_high << 32) | |
| 1232 | BitField<20, 4, u64> attribute5_register; | 1238 | (u64)input_attribute_to_register_map_low; |
| 1233 | BitField<24, 4, u64> attribute6_register; | 1239 | return (map >> (attribute_index * 4)) & 0b1111; |
| 1234 | BitField<28, 4, u64> attribute7_register; | 1240 | } |
| 1235 | BitField<32, 4, u64> attribute8_register; | ||
| 1236 | BitField<36, 4, u64> attribute9_register; | ||
| 1237 | BitField<40, 4, u64> attribute10_register; | ||
| 1238 | BitField<44, 4, u64> attribute11_register; | ||
| 1239 | BitField<48, 4, u64> attribute12_register; | ||
| 1240 | BitField<52, 4, u64> attribute13_register; | ||
| 1241 | BitField<56, 4, u64> attribute14_register; | ||
| 1242 | BitField<60, 4, u64> attribute15_register; | ||
| 1243 | |||
| 1244 | int GetRegisterForAttribute(int attribute_index) const { | ||
| 1245 | u64 fields[] = { | ||
| 1246 | attribute0_register, attribute1_register, attribute2_register, | ||
| 1247 | attribute3_register, attribute4_register, attribute5_register, | ||
| 1248 | attribute6_register, attribute7_register, attribute8_register, | ||
| 1249 | attribute9_register, attribute10_register, attribute11_register, | ||
| 1250 | attribute12_register, attribute13_register, attribute14_register, | ||
| 1251 | attribute15_register, | ||
| 1252 | }; | ||
| 1253 | return (int)fields[attribute_index]; | ||
| 1254 | } | ||
| 1255 | } input_register_map; | ||
| 1256 | 1241 | ||
| 1257 | BitField<0, 16, u32> output_mask; | 1242 | BitField<0, 16, u32> output_mask; |
| 1258 | 1243 | ||
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index e4f2e6d5d..785d05650 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -23,7 +23,7 @@ struct State { | |||
| 23 | Shader::ShaderSetup vs; | 23 | Shader::ShaderSetup vs; |
| 24 | Shader::ShaderSetup gs; | 24 | Shader::ShaderSetup gs; |
| 25 | 25 | ||
| 26 | std::array<Math::Vec4<float24>, 16> vs_default_attributes; | 26 | Shader::AttributeBuffer input_default_attributes; |
| 27 | 27 | ||
| 28 | struct { | 28 | struct { |
| 29 | union LutEntry { | 29 | union LutEntry { |
| @@ -66,7 +66,7 @@ struct State { | |||
| 66 | /// Struct used to describe immediate mode rendering state | 66 | /// Struct used to describe immediate mode rendering state |
| 67 | struct ImmediateModeState { | 67 | struct ImmediateModeState { |
| 68 | // Used to buffer partial vertices for immediate-mode rendering. | 68 | // Used to buffer partial vertices for immediate-mode rendering. |
| 69 | Shader::InputVertex input_vertex; | 69 | Shader::AttributeBuffer input_vertex; |
| 70 | // Index of the next attribute to be loaded into `input_vertex`. | 70 | // Index of the next attribute to be loaded into `input_vertex`. |
| 71 | u32 current_attribute = 0; | 71 | u32 current_attribute = 0; |
| 72 | } immediate; | 72 | } immediate; |
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index be7377290..e71ff5719 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -14,7 +14,7 @@ PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topolo | |||
| 14 | : topology(topology), buffer_index(0) {} | 14 | : topology(topology), buffer_index(0) {} |
| 15 | 15 | ||
| 16 | template <typename VertexType> | 16 | template <typename VertexType> |
| 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, | 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, |
| 18 | TriangleHandler triangle_handler) { | 18 | TriangleHandler triangle_handler) { |
| 19 | switch (topology) { | 19 | switch (topology) { |
| 20 | // TODO: Figure out what's different with TriangleTopology::Shader. | 20 | // TODO: Figure out what's different with TriangleTopology::Shader. |
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 0384d5984..24da47382 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h | |||
| @@ -15,7 +15,8 @@ namespace Pica { | |||
| 15 | */ | 15 | */ |
| 16 | template <typename VertexType> | 16 | template <typename VertexType> |
| 17 | struct PrimitiveAssembler { | 17 | struct PrimitiveAssembler { |
| 18 | using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>; | 18 | using TriangleHandler = |
| 19 | std::function<void(const VertexType& v0, const VertexType& v1, const VertexType& v2)>; | ||
| 19 | 20 | ||
| 20 | PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); | 21 | PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); |
| 21 | 22 | ||
| @@ -25,7 +26,7 @@ struct PrimitiveAssembler { | |||
| 25 | * NOTE: We could specify the triangle handler in the constructor, but this way we can | 26 | * NOTE: We could specify the triangle handler in the constructor, but this way we can |
| 26 | * keep event and handler code next to each other. | 27 | * keep event and handler code next to each other. |
| 27 | */ | 28 | */ |
| 28 | void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); | 29 | void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); |
| 29 | 30 | ||
| 30 | /** | 31 | /** |
| 31 | * Resets the internal state of the PrimitiveAssembler. | 32 | * Resets the internal state of the PrimitiveAssembler. |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index c034c12d3..287d732b5 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -308,8 +308,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24 | |||
| 308 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing | 308 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |
| 309 | * culling via recursion. | 309 | * culling via recursion. |
| 310 | */ | 310 | */ |
| 311 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, | 311 | static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2, |
| 312 | const Shader::OutputVertex& v2, bool reversed = false) { | 312 | bool reversed = false) { |
| 313 | const auto& regs = g_state.regs; | 313 | const auto& regs = g_state.regs; |
| 314 | MICROPROFILE_SCOPE(GPU_Rasterization); | 314 | MICROPROFILE_SCOPE(GPU_Rasterization); |
| 315 | 315 | ||
| @@ -1277,8 +1277,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader | |||
| 1277 | } | 1277 | } |
| 1278 | } | 1278 | } |
| 1279 | 1279 | ||
| 1280 | void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, | 1280 | void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) { |
| 1281 | const Shader::OutputVertex& v2) { | ||
| 1282 | ProcessTriangleInternal(v0, v1, v2); | 1281 | ProcessTriangleInternal(v0, v1, v2); |
| 1283 | } | 1282 | } |
| 1284 | 1283 | ||
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h index 6cbda3067..3a72ac343 100644 --- a/src/video_core/rasterizer.h +++ b/src/video_core/rasterizer.h | |||
| @@ -4,16 +4,44 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Pica { | 7 | #include "video_core/shader/shader.h" |
| 8 | 8 | ||
| 9 | namespace Shader { | 9 | namespace Pica { |
| 10 | struct OutputVertex; | ||
| 11 | } | ||
| 12 | 10 | ||
| 13 | namespace Rasterizer { | 11 | namespace Rasterizer { |
| 14 | 12 | ||
| 15 | void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, | 13 | struct Vertex : Shader::OutputVertex { |
| 16 | const Shader::OutputVertex& v2); | 14 | Vertex(const OutputVertex& v) : OutputVertex(v) {} |
| 15 | |||
| 16 | // Attributes used to store intermediate results | ||
| 17 | // position after perspective divide | ||
| 18 | Math::Vec3<float24> screenpos; | ||
| 19 | |||
| 20 | // Linear interpolation | ||
| 21 | // factor: 0=this, 1=vtx | ||
| 22 | void Lerp(float24 factor, const Vertex& vtx) { | ||
| 23 | pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||
| 24 | |||
| 25 | // TODO: Should perform perspective correct interpolation here... | ||
| 26 | tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||
| 27 | tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); | ||
| 28 | tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); | ||
| 29 | |||
| 30 | screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||
| 31 | |||
| 32 | color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||
| 33 | } | ||
| 34 | |||
| 35 | // Linear interpolation | ||
| 36 | // factor: 0=v0, 1=v1 | ||
| 37 | static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { | ||
| 38 | Vertex ret = v0; | ||
| 39 | ret.Lerp(factor, v1); | ||
| 40 | return ret; | ||
| 41 | } | ||
| 42 | }; | ||
| 43 | |||
| 44 | void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2); | ||
| 17 | 45 | ||
| 18 | } // namespace Rasterizer | 46 | } // namespace Rasterizer |
| 19 | 47 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f3674e965..071e4ace0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -467,7 +467,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 467 | 467 | ||
| 468 | // Fragment lighting switches | 468 | // Fragment lighting switches |
| 469 | case PICA_REG_INDEX(lighting.disable): | 469 | case PICA_REG_INDEX(lighting.disable): |
| 470 | case PICA_REG_INDEX(lighting.num_lights): | 470 | case PICA_REG_INDEX(lighting.max_light_index): |
| 471 | case PICA_REG_INDEX(lighting.config0): | 471 | case PICA_REG_INDEX(lighting.config0): |
| 472 | case PICA_REG_INDEX(lighting.config1): | 472 | case PICA_REG_INDEX(lighting.config1): |
| 473 | case PICA_REG_INDEX(lighting.abs_lut_input): | 473 | case PICA_REG_INDEX(lighting.abs_lut_input): |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index cc3e4bed5..a1aa07074 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -84,7 +84,7 @@ union PicaShaderConfig { | |||
| 84 | // Fragment lighting | 84 | // Fragment lighting |
| 85 | 85 | ||
| 86 | state.lighting.enable = !regs.lighting.disable; | 86 | state.lighting.enable = !regs.lighting.disable; |
| 87 | state.lighting.src_num = regs.lighting.num_lights + 1; | 87 | state.lighting.src_num = regs.lighting.max_light_index + 1; |
| 88 | 88 | ||
| 89 | for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { | 89 | for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { |
| 90 | unsigned num = regs.lighting.light_enable.GetNum(light_index); | 90 | unsigned num = regs.lighting.light_enable.GetNum(light_index); |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2da50bd62..f5f7ea61d 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <cmath> | 5 | #include <cmath> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/bit_set.h" | ||
| 7 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 8 | #include "common/microprofile.h" | 9 | #include "common/microprofile.h" |
| 9 | #include "video_core/pica.h" | 10 | #include "video_core/pica.h" |
| @@ -19,38 +20,32 @@ namespace Pica { | |||
| 19 | 20 | ||
| 20 | namespace Shader { | 21 | namespace Shader { |
| 21 | 22 | ||
| 22 | OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | 23 | OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) { |
| 23 | u32 output_mask) { | ||
| 24 | // Setup output data | 24 | // Setup output data |
| 25 | OutputVertex ret; | 25 | union { |
| 26 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 26 | OutputVertex ret{}; |
| 27 | // figure out what those circumstances are and enable the remaining outputs then. | 27 | std::array<float24, 24> vertex_slots; |
| 28 | unsigned index = 0; | 28 | }; |
| 29 | for (unsigned i = 0; i < 7; ++i) { | 29 | static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes."); |
| 30 | 30 | ||
| 31 | if (index >= regs.vs_output_total) | 31 | unsigned int num_attributes = regs.vs_output_total; |
| 32 | break; | 32 | ASSERT(num_attributes <= 7); |
| 33 | for (unsigned int i = 0; i < num_attributes; ++i) { | ||
| 34 | const auto& output_register_map = regs.vs_output_attributes[i]; | ||
| 33 | 35 | ||
| 34 | if ((output_mask & (1 << i)) == 0) | 36 | Regs::VSOutputAttributes::Semantic semantics[4] = { |
| 35 | continue; | 37 | output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, |
| 36 | 38 | output_register_map.map_w}; | |
| 37 | const auto& output_register_map = regs.vs_output_attributes[index]; | ||
| 38 | |||
| 39 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | ||
| 40 | output_register_map.map_z, output_register_map.map_w}; | ||
| 41 | 39 | ||
| 42 | for (unsigned comp = 0; comp < 4; ++comp) { | 40 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 43 | float24* out = ((float24*)&ret) + semantics[comp]; | 41 | Regs::VSOutputAttributes::Semantic semantic = semantics[comp]; |
| 44 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 42 | float24* out = &vertex_slots[semantic]; |
| 45 | *out = output_regs[i][comp]; | 43 | if (semantic < vertex_slots.size()) { |
| 46 | } else { | 44 | *out = input.attr[i][comp]; |
| 47 | // Zero output so that attributes which aren't output won't have denormals in them, | 45 | } else if (semantic != Regs::VSOutputAttributes::INVALID) { |
| 48 | // which would slow us down later. | 46 | LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic); |
| 49 | memset(out, 0, sizeof(*out)); | ||
| 50 | } | 47 | } |
| 51 | } | 48 | } |
| 52 | |||
| 53 | index++; | ||
| 54 | } | 49 | } |
| 55 | 50 | ||
| 56 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing | 51 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing |
| @@ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co | |||
| 71 | return ret; | 66 | return ret; |
| 72 | } | 67 | } |
| 73 | 68 | ||
| 74 | void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { | 69 | void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) { |
| 75 | // Setup input register table | 70 | const unsigned max_attribute = config.max_input_attribute_index; |
| 76 | const auto& attribute_register_map = g_state.regs.vs.input_register_map; | ||
| 77 | 71 | ||
| 78 | for (int i = 0; i < num_attributes; i++) | 72 | for (unsigned attr = 0; attr <= max_attribute; ++attr) { |
| 79 | registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | 73 | unsigned reg = config.GetRegisterForAttribute(attr); |
| 74 | registers.input[reg] = input.attr[attr]; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) { | ||
| 79 | unsigned int output_i = 0; | ||
| 80 | for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) { | ||
| 81 | output.attr[output_i++] = registers.output[reg]; | ||
| 82 | } | ||
| 80 | } | 83 | } |
| 81 | 84 | ||
| 82 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 85 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 44d9f76c3..b188d3edf 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -23,14 +23,11 @@ namespace Pica { | |||
| 23 | 23 | ||
| 24 | namespace Shader { | 24 | namespace Shader { |
| 25 | 25 | ||
| 26 | struct InputVertex { | 26 | struct AttributeBuffer { |
| 27 | alignas(16) Math::Vec4<float24> attr[16]; | 27 | alignas(16) Math::Vec4<float24> attr[16]; |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | struct OutputVertex { | 30 | struct OutputVertex { |
| 31 | OutputVertex() = default; | ||
| 32 | |||
| 33 | // VS output attributes | ||
| 34 | Math::Vec4<float24> pos; | 31 | Math::Vec4<float24> pos; |
| 35 | Math::Vec4<float24> quat; | 32 | Math::Vec4<float24> quat; |
| 36 | Math::Vec4<float24> color; | 33 | Math::Vec4<float24> color; |
| @@ -42,43 +39,22 @@ struct OutputVertex { | |||
| 42 | INSERT_PADDING_WORDS(1); | 39 | INSERT_PADDING_WORDS(1); |
| 43 | Math::Vec2<float24> tc2; | 40 | Math::Vec2<float24> tc2; |
| 44 | 41 | ||
| 45 | // Padding for optimal alignment | 42 | static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output); |
| 46 | INSERT_PADDING_WORDS(4); | ||
| 47 | |||
| 48 | // Attributes used to store intermediate results | ||
| 49 | |||
| 50 | // position after perspective divide | ||
| 51 | Math::Vec3<float24> screenpos; | ||
| 52 | INSERT_PADDING_WORDS(1); | ||
| 53 | |||
| 54 | // Linear interpolation | ||
| 55 | // factor: 0=this, 1=vtx | ||
| 56 | void Lerp(float24 factor, const OutputVertex& vtx) { | ||
| 57 | pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||
| 58 | |||
| 59 | // TODO: Should perform perspective correct interpolation here... | ||
| 60 | tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||
| 61 | tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); | ||
| 62 | tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); | ||
| 63 | |||
| 64 | screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||
| 65 | |||
| 66 | color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||
| 67 | } | ||
| 68 | |||
| 69 | // Linear interpolation | ||
| 70 | // factor: 0=v0, 1=v1 | ||
| 71 | static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { | ||
| 72 | OutputVertex ret = v0; | ||
| 73 | ret.Lerp(factor, v1); | ||
| 74 | return ret; | ||
| 75 | } | ||
| 76 | |||
| 77 | static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | ||
| 78 | u32 output_mask); | ||
| 79 | }; | 43 | }; |
| 44 | #define ASSERT_POS(var, pos) \ | ||
| 45 | static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ | ||
| 46 | "offset.") | ||
| 47 | ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X); | ||
| 48 | ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X); | ||
| 49 | ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R); | ||
| 50 | ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U); | ||
| 51 | ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U); | ||
| 52 | ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W); | ||
| 53 | ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X); | ||
| 54 | ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U); | ||
| 55 | #undef ASSERT_POS | ||
| 80 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 56 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 81 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 57 | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); |
| 82 | 58 | ||
| 83 | /** | 59 | /** |
| 84 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 60 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| @@ -137,10 +113,12 @@ struct UnitState { | |||
| 137 | /** | 113 | /** |
| 138 | * Loads the unit state with an input vertex. | 114 | * Loads the unit state with an input vertex. |
| 139 | * | 115 | * |
| 140 | * @param input Input vertex into the shader | 116 | * @param config Shader configuration registers corresponding to the unit. |
| 141 | * @param num_attributes The number of vertex shader attributes to load | 117 | * @param input Attribute buffer to load into the input registers. |
| 142 | */ | 118 | */ |
| 143 | void LoadInputVertex(const InputVertex& input, int num_attributes); | 119 | void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); |
| 120 | |||
| 121 | void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output); | ||
| 144 | }; | 122 | }; |
| 145 | 123 | ||
| 146 | struct ShaderSetup { | 124 | struct ShaderSetup { |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c0c89b857..81522b8f5 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { | |||
| 668 | } | 668 | } |
| 669 | 669 | ||
| 670 | DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, | 670 | DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, |
| 671 | const InputVertex& input, | 671 | const AttributeBuffer& input, |
| 672 | int num_attributes) const { | 672 | const Regs::ShaderConfig& config) const { |
| 673 | UnitState state; | 673 | UnitState state; |
| 674 | DebugData<true> debug_data; | 674 | DebugData<true> debug_data; |
| 675 | 675 | ||
| 676 | // Setup input register table | 676 | // Setup input register table |
| 677 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | 677 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); |
| 678 | state.LoadInputVertex(input, num_attributes); | 678 | state.LoadInput(config, input); |
| 679 | RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); | 679 | RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); |
| 680 | return debug_data; | 680 | return debug_data; |
| 681 | } | 681 | } |
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d6c0e2d8c..d7a61e122 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -19,12 +19,11 @@ public: | |||
| 19 | /** | 19 | /** |
| 20 | * Produce debug information based on the given shader and input vertex | 20 | * Produce debug information based on the given shader and input vertex |
| 21 | * @param input Input vertex into the shader | 21 | * @param input Input vertex into the shader |
| 22 | * @param num_attributes The number of vertex shader attributes | ||
| 23 | * @param config Configuration object for the shader pipeline | 22 | * @param config Configuration object for the shader pipeline |
| 24 | * @return Debug information for this shader with regards to the given vertex | 23 | * @return Debug information for this shader with regards to the given vertex |
| 25 | */ | 24 | */ |
| 26 | DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, | 25 | DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, |
| 27 | int num_attributes) const; | 26 | const Regs::ShaderConfig& config) const; |
| 28 | }; | 27 | }; |
| 29 | 28 | ||
| 30 | } // namespace | 29 | } // namespace |
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 2b8ef7018..bf83b61ca 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -70,7 +70,8 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | |||
| 70 | is_setup = true; | 70 | is_setup = true; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, | 73 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, |
| 74 | Shader::AttributeBuffer& input, | ||
| 74 | DebugUtils::MemoryAccessTracker& memory_accesses) { | 75 | DebugUtils::MemoryAccessTracker& memory_accesses) { |
| 75 | ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); | 76 | ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); |
| 76 | 77 | ||
| @@ -142,7 +143,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I | |||
| 142 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | 143 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 143 | } else if (vertex_attribute_is_default[i]) { | 144 | } else if (vertex_attribute_is_default[i]) { |
| 144 | // Load the default attribute if we're configured to do so | 145 | // Load the default attribute if we're configured to do so |
| 145 | input.attr[i] = g_state.vs_default_attributes[i]; | 146 | input.attr[i] = g_state.input_default_attributes.attr[i]; |
| 146 | LOG_TRACE(HW_GPU, | 147 | LOG_TRACE(HW_GPU, |
| 147 | "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, | 148 | "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, |
| 148 | vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | 149 | vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index 9f2098bb2..51f3d45b4 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h | |||
| @@ -11,7 +11,7 @@ class MemoryAccessTracker; | |||
| 11 | } | 11 | } |
| 12 | 12 | ||
| 13 | namespace Shader { | 13 | namespace Shader { |
| 14 | struct InputVertex; | 14 | struct AttributeBuffer; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | class VertexLoader { | 17 | class VertexLoader { |
| @@ -22,7 +22,7 @@ public: | |||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | void Setup(const Pica::Regs& regs); | 24 | void Setup(const Pica::Regs& regs); |
| 25 | void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, | 25 | void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input, |
| 26 | DebugUtils::MemoryAccessTracker& memory_accesses); | 26 | DebugUtils::MemoryAccessTracker& memory_accesses); |
| 27 | 27 | ||
| 28 | int GetNumTotalAttributes() const { | 28 | int GetNumTotalAttributes() const { |