summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2017-09-07 23:02:59 -0400
committerGravatar GitHub2017-09-07 23:02:59 -0400
commit11baa40d75d8a479bd738a05e91bba6f09856fe5 (patch)
tree2a7affcefd22d5332970c7a28a04c139aa9a6654
parentMerge pull request #2918 from jroweboy/remove-debug (diff)
parentpica/command_processor: build geometry pipeline and run geometry shader (diff)
downloadyuzu-11baa40d75d8a479bd738a05e91bba6f09856fe5.tar.gz
yuzu-11baa40d75d8a479bd738a05e91bba6f09856fe5.tar.xz
yuzu-11baa40d75d8a479bd738a05e91bba6f09856fe5.zip
Merge pull request #2865 from wwylele/gs++
PICA: implemented geometry shader
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp54
-rw-r--r--src/video_core/geometry_pipeline.cpp274
-rw-r--r--src/video_core/geometry_pipeline.h49
-rw-r--r--src/video_core/pica.cpp21
-rw-r--r--src/video_core/pica_state.h11
-rw-r--r--src/video_core/primitive_assembly.cpp15
-rw-r--r--src/video_core/primitive_assembly.h7
-rw-r--r--src/video_core/regs_pipeline.h34
-rw-r--r--src/video_core/regs_shader.h7
-rw-r--r--src/video_core/shader/shader.cpp41
-rw-r--r--src/video_core/shader/shader.h49
-rw-r--r--src/video_core/shader/shader_interpreter.cpp16
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp49
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.h2
15 files changed, 594 insertions, 37 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index cffa4c952..82f47d8a9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,7 @@
1set(SRCS 1set(SRCS
2 command_processor.cpp 2 command_processor.cpp
3 debug_utils/debug_utils.cpp 3 debug_utils/debug_utils.cpp
4 geometry_pipeline.cpp
4 pica.cpp 5 pica.cpp
5 primitive_assembly.cpp 6 primitive_assembly.cpp
6 regs.cpp 7 regs.cpp
@@ -29,6 +30,7 @@ set(SRCS
29set(HEADERS 30set(HEADERS
30 command_processor.h 31 command_processor.h
31 debug_utils/debug_utils.h 32 debug_utils/debug_utils.h
33 geometry_pipeline.h
32 gpu_debugger.h 34 gpu_debugger.h
33 pica.h 35 pica.h
34 pica_state.h 36 pica_state.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index f98ca3302..fb65a3a0a 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
161 161
162 case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): 162 case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
163 g_state.immediate.current_attribute = 0; 163 g_state.immediate.current_attribute = 0;
164 g_state.immediate.reset_geometry_pipeline = true;
164 default_attr_counter = 0; 165 default_attr_counter = 0;
165 break; 166 break;
166 167
@@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
234 shader_engine->Run(g_state.vs, shader_unit); 235 shader_engine->Run(g_state.vs, shader_unit);
235 shader_unit.WriteOutput(regs.vs, output); 236 shader_unit.WriteOutput(regs.vs, output);
236 237
237 // Send to renderer 238 // Send to geometry pipeline
238 using Pica::Shader::OutputVertex; 239 if (g_state.immediate.reset_geometry_pipeline) {
239 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, 240 g_state.geometry_pipeline.Reconfigure();
240 const OutputVertex& v2) { 241 g_state.immediate.reset_geometry_pipeline = false;
241 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); 242 }
242 }; 243 ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
243 244 g_state.geometry_pipeline.Setup(shader_engine);
244 g_state.primitive_assembler.SubmitVertex( 245 g_state.geometry_pipeline.SubmitVertex(output);
245 Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output),
246 AddTriangle);
247 } 246 }
248 } 247 }
249 } 248 }
@@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
321 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 320 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
322 const size_t VERTEX_CACHE_SIZE = 32; 321 const size_t VERTEX_CACHE_SIZE = 32;
323 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; 322 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
324 std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; 323 std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
325 Shader::OutputVertex output_vertex; 324 Shader::AttributeBuffer vs_output;
326 325
327 unsigned int vertex_cache_pos = 0; 326 unsigned int vertex_cache_pos = 0;
328 vertex_cache_ids.fill(-1); 327 vertex_cache_ids.fill(-1);
@@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
332 331
333 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); 332 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
334 333
334 g_state.geometry_pipeline.Reconfigure();
335 g_state.geometry_pipeline.Setup(shader_engine);
336 if (g_state.geometry_pipeline.NeedIndexInput())
337 ASSERT(is_indexed);
338
335 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { 339 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
336 // Indexed rendering doesn't use the start offset 340 // Indexed rendering doesn't use the start offset
337 unsigned int vertex = 341 unsigned int vertex =
@@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
345 bool vertex_cache_hit = false; 349 bool vertex_cache_hit = false;
346 350
347 if (is_indexed) { 351 if (is_indexed) {
352 if (g_state.geometry_pipeline.NeedIndexInput()) {
353 g_state.geometry_pipeline.SubmitIndex(vertex);
354 continue;
355 }
356
348 if (g_debug_context && Pica::g_debug_context->recorder) { 357 if (g_debug_context && Pica::g_debug_context->recorder) {
349 int size = index_u16 ? 2 : 1; 358 int size = index_u16 ? 2 : 1;
350 memory_accesses.AddAccess(base_address + index_info.offset + size * index, 359 memory_accesses.AddAccess(base_address + index_info.offset + size * index,
@@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
353 362
354 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { 363 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
355 if (vertex == vertex_cache_ids[i]) { 364 if (vertex == vertex_cache_ids[i]) {
356 output_vertex = vertex_cache[i]; 365 vs_output = vertex_cache[i];
357 vertex_cache_hit = true; 366 vertex_cache_hit = true;
358 break; 367 break;
359 } 368 }
@@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
362 371
363 if (!vertex_cache_hit) { 372 if (!vertex_cache_hit) {
364 // Initialize data for the current vertex 373 // Initialize data for the current vertex
365 Shader::AttributeBuffer input, output{}; 374 Shader::AttributeBuffer input;
366 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); 375 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
367 376
368 // Send to vertex shader 377 // Send to vertex shader
@@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
371 (void*)&input); 380 (void*)&input);
372 shader_unit.LoadInput(regs.vs, input); 381 shader_unit.LoadInput(regs.vs, input);
373 shader_engine->Run(g_state.vs, shader_unit); 382 shader_engine->Run(g_state.vs, shader_unit);
374 shader_unit.WriteOutput(regs.vs, output); 383 shader_unit.WriteOutput(regs.vs, vs_output);
375
376 // Retrieve vertex from register data
377 output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output);
378 384
379 if (is_indexed) { 385 if (is_indexed) {
380 vertex_cache[vertex_cache_pos] = output_vertex; 386 vertex_cache[vertex_cache_pos] = vs_output;
381 vertex_cache_ids[vertex_cache_pos] = vertex; 387 vertex_cache_ids[vertex_cache_pos] = vertex;
382 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; 388 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
383 } 389 }
384 } 390 }
385 391
386 // Send to renderer 392 // Send to geometry pipeline
387 using Pica::Shader::OutputVertex; 393 g_state.geometry_pipeline.SubmitVertex(vs_output);
388 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
389 const OutputVertex& v2) {
390 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
391 };
392
393 primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
394 } 394 }
395 395
396 for (auto& range : memory_accesses.ranges) { 396 for (auto& range : memory_accesses.ranges) {
diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp
new file mode 100644
index 000000000..b146e2ecb
--- /dev/null
+++ b/src/video_core/geometry_pipeline.cpp
@@ -0,0 +1,274 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/geometry_pipeline.h"
6#include "video_core/pica_state.h"
7#include "video_core/regs.h"
8#include "video_core/renderer_base.h"
9#include "video_core/video_core.h"
10
11namespace Pica {
12
13/// An attribute buffering interface for different pipeline modes
14class GeometryPipelineBackend {
15public:
16 virtual ~GeometryPipelineBackend() = default;
17
18 /// Checks if there is no incomplete data transfer
19 virtual bool IsEmpty() const = 0;
20
21 /// Checks if the pipeline needs a direct input from index buffer
22 virtual bool NeedIndexInput() const = 0;
23
24 /// Submits an index from index buffer
25 virtual void SubmitIndex(unsigned int val) = 0;
26
27 /**
28 * Submits vertex attributes
29 * @param input attributes of a vertex output from vertex shader
30 * @return if the buffer is full and the geometry shader should be invoked
31 */
32 virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0;
33};
34
35// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit.
36// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is
37// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry
38// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices
39// for one geometry shader invocation.
40// TODO: what happens when the input size is not divisible by the output size?
41class GeometryPipeline_Point : public GeometryPipelineBackend {
42public:
43 GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) {
44 ASSERT(regs.pipeline.variable_primitive == 0);
45 ASSERT(regs.gs.input_to_uniform == 0);
46 vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
47 size_t gs_input_num = regs.gs.max_input_attribute_index + 1;
48 ASSERT(gs_input_num % vs_output_num == 0);
49 buffer_cur = attribute_buffer.attr;
50 buffer_end = attribute_buffer.attr + gs_input_num;
51 }
52
53 bool IsEmpty() const override {
54 return buffer_cur == attribute_buffer.attr;
55 }
56
57 bool NeedIndexInput() const override {
58 return false;
59 }
60
61 void SubmitIndex(unsigned int val) override {
62 UNREACHABLE();
63 }
64
65 bool SubmitVertex(const Shader::AttributeBuffer& input) override {
66 buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
67 if (buffer_cur == buffer_end) {
68 buffer_cur = attribute_buffer.attr;
69 unit.LoadInput(regs.gs, attribute_buffer);
70 return true;
71 }
72 return false;
73 }
74
75private:
76 const Regs& regs;
77 Shader::GSUnitState& unit;
78 Shader::AttributeBuffer attribute_buffer;
79 Math::Vec4<float24>* buffer_cur;
80 Math::Vec4<float24>* buffer_end;
81 unsigned int vs_output_num;
82};
83
84// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the
85// geometry shader unit. The number of vertex is variable, which is specified by the first index
86// value in the batch. This mode is usually used for subdivision.
87class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend {
88public:
89 GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup)
90 : regs(regs), setup(setup) {
91 ASSERT(regs.pipeline.variable_primitive == 1);
92 ASSERT(regs.gs.input_to_uniform == 1);
93 vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
94 }
95
96 bool IsEmpty() const override {
97 return need_index;
98 }
99
100 bool NeedIndexInput() const override {
101 return need_index;
102 }
103
104 void SubmitIndex(unsigned int val) override {
105 DEBUG_ASSERT(need_index);
106
107 // The number of vertex input is put to the uniform register
108 float24 vertex_num = float24::FromFloat32(val);
109 setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num);
110
111 // The second uniform register and so on are used for receiving input vertices
112 buffer_cur = setup.uniforms.f + 1;
113
114 main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1;
115 total_vertex_num = val;
116 need_index = false;
117 }
118
119 bool SubmitVertex(const Shader::AttributeBuffer& input) override {
120 DEBUG_ASSERT(!need_index);
121 if (main_vertex_num != 0) {
122 // For main vertices, receive all attributes
123 buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
124 --main_vertex_num;
125 } else {
126 // For other vertices, only receive the first attribute (usually the position)
127 *(buffer_cur++) = input.attr[0];
128 }
129 --total_vertex_num;
130
131 if (total_vertex_num == 0) {
132 need_index = true;
133 return true;
134 }
135
136 return false;
137 }
138
139private:
140 bool need_index = true;
141 const Regs& regs;
142 Shader::ShaderSetup& setup;
143 unsigned int main_vertex_num;
144 unsigned int total_vertex_num;
145 Math::Vec4<float24>* buffer_cur;
146 unsigned int vs_output_num;
147};
148
149// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry
150// shader unit. The number of vertex per shader invocation is constant. This is usually used for
151// particle system.
152class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend {
153public:
154 GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup)
155 : regs(regs), setup(setup) {
156 ASSERT(regs.pipeline.variable_primitive == 0);
157 ASSERT(regs.gs.input_to_uniform == 1);
158 vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
159 ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1);
160 size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1;
161 buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index;
162 buffer_end = buffer_begin + vs_output_num * vertex_num;
163 }
164
165 bool IsEmpty() const override {
166 return buffer_cur == buffer_begin;
167 }
168
169 bool NeedIndexInput() const override {
170 return false;
171 }
172
173 void SubmitIndex(unsigned int val) override {
174 UNREACHABLE();
175 }
176
177 bool SubmitVertex(const Shader::AttributeBuffer& input) override {
178 buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
179 if (buffer_cur == buffer_end) {
180 buffer_cur = buffer_begin;
181 return true;
182 }
183 return false;
184 }
185
186private:
187 const Regs& regs;
188 Shader::ShaderSetup& setup;
189 Math::Vec4<float24>* buffer_begin;
190 Math::Vec4<float24>* buffer_cur;
191 Math::Vec4<float24>* buffer_end;
192 unsigned int vs_output_num;
193};
194
195GeometryPipeline::GeometryPipeline(State& state) : state(state) {}
196
197GeometryPipeline::~GeometryPipeline() = default;
198
199void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) {
200 this->vertex_handler = vertex_handler;
201}
202
203void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) {
204 if (!backend)
205 return;
206
207 this->shader_engine = shader_engine;
208 shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset);
209}
210
211void GeometryPipeline::Reconfigure() {
212 ASSERT(!backend || backend->IsEmpty());
213
214 if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) {
215 backend = nullptr;
216 return;
217 }
218
219 ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes);
220
221 // The following assumes that when geometry shader is in use, the shader unit 3 is configured as
222 // a geometry shader unit.
223 // TODO: what happens if this is not true?
224 ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1);
225 ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS);
226
227 state.gs_unit.ConfigOutput(state.regs.gs);
228
229 ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a ==
230 state.regs.pipeline.vs_outmap_total_minus_1_b);
231
232 switch (state.regs.pipeline.gs_config.mode) {
233 case PipelineRegs::GSMode::Point:
234 backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit);
235 break;
236 case PipelineRegs::GSMode::VariablePrimitive:
237 backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs);
238 break;
239 case PipelineRegs::GSMode::FixedPrimitive:
240 backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs);
241 break;
242 default:
243 UNREACHABLE();
244 }
245}
246
247bool GeometryPipeline::NeedIndexInput() const {
248 if (!backend)
249 return false;
250 return backend->NeedIndexInput();
251}
252
253void GeometryPipeline::SubmitIndex(unsigned int val) {
254 backend->SubmitIndex(val);
255}
256
257void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) {
258 if (!backend) {
259 // No backend means the geometry shader is disabled, so we send the vertex shader output
260 // directly to the primitive assembler.
261 vertex_handler(input);
262 } else {
263 if (backend->SubmitVertex(input)) {
264 shader_engine->Run(state.gs, state.gs_unit);
265
266 // The uniform b15 is set to true after every geometry shader invocation. This is useful
267 // for the shader to know if this is the first invocation in a batch, if the program set
268 // b15 to false first.
269 state.gs.uniforms.b[15] = true;
270 }
271 }
272}
273
274} // namespace Pica
diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h
new file mode 100644
index 000000000..91fdd3192
--- /dev/null
+++ b/src/video_core/geometry_pipeline.h
@@ -0,0 +1,49 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include "video_core/shader/shader.h"
9
10namespace Pica {
11
12struct State;
13
14class GeometryPipelineBackend;
15
16/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler
17class GeometryPipeline {
18public:
19 explicit GeometryPipeline(State& state);
20 ~GeometryPipeline();
21
22 /// Sets the handler for receiving vertex outputs from vertex shader
23 void SetVertexHandler(Shader::VertexHandler vertex_handler);
24
25 /**
26 * Setup the geometry shader unit if it is in use
27 * @param shader_engine the shader engine for the geometry shader to run
28 */
29 void Setup(Shader::ShaderEngine* shader_engine);
30
31 /// Reconfigures the pipeline according to current register settings
32 void Reconfigure();
33
34 /// Checks if the pipeline needs a direct input from index buffer
35 bool NeedIndexInput() const;
36
37 /// Submits an index from index buffer. Call this only when NeedIndexInput returns true
38 void SubmitIndex(unsigned int val);
39
40 /// Submits vertex attributes output from vertex shader
41 void SubmitVertex(const Shader::AttributeBuffer& input);
42
43private:
44 Shader::VertexHandler vertex_handler;
45 Shader::ShaderEngine* shader_engine;
46 std::unique_ptr<GeometryPipelineBackend> backend;
47 State& state;
48};
49} // namespace Pica
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index b95148a6a..218e06883 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -3,9 +3,11 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include "video_core/geometry_pipeline.h"
6#include "video_core/pica.h" 7#include "video_core/pica.h"
7#include "video_core/pica_state.h" 8#include "video_core/pica_state.h"
8#include "video_core/regs_pipeline.h" 9#include "video_core/renderer_base.h"
10#include "video_core/video_core.h"
9 11
10namespace Pica { 12namespace Pica {
11 13
@@ -24,6 +26,23 @@ void Zero(T& o) {
24 memset(&o, 0, sizeof(o)); 26 memset(&o, 0, sizeof(o));
25} 27}
26 28
29State::State() : geometry_pipeline(*this) {
30 auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
31 using Pica::Shader::OutputVertex;
32 auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
33 const OutputVertex& v2) {
34 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
35 };
36 primitive_assembler.SubmitVertex(
37 Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle);
38 };
39
40 auto SetWinding = [this]() { primitive_assembler.SetWinding(); };
41
42 g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding);
43 g_state.geometry_pipeline.SetVertexHandler(SubmitVertex);
44}
45
27void State::Reset() { 46void State::Reset() {
28 Zero(regs); 47 Zero(regs);
29 Zero(vs); 48 Zero(vs);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 864a2c9e6..c6634a0bc 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -8,6 +8,7 @@
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/vector_math.h" 10#include "common/vector_math.h"
11#include "video_core/geometry_pipeline.h"
11#include "video_core/primitive_assembly.h" 12#include "video_core/primitive_assembly.h"
12#include "video_core/regs.h" 13#include "video_core/regs.h"
13#include "video_core/shader/shader.h" 14#include "video_core/shader/shader.h"
@@ -16,6 +17,7 @@ namespace Pica {
16 17
17/// Struct used to describe current Pica state 18/// Struct used to describe current Pica state
18struct State { 19struct State {
20 State();
19 void Reset(); 21 void Reset();
20 22
21 /// Pica registers 23 /// Pica registers
@@ -137,8 +139,17 @@ struct State {
137 Shader::AttributeBuffer input_vertex; 139 Shader::AttributeBuffer input_vertex;
138 // Index of the next attribute to be loaded into `input_vertex`. 140 // Index of the next attribute to be loaded into `input_vertex`.
139 u32 current_attribute = 0; 141 u32 current_attribute = 0;
142 // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure
143 bool reset_geometry_pipeline = true;
140 } immediate; 144 } immediate;
141 145
146 // the geometry shader needs to be kept in the global state because some shaders relie on
147 // preserved register value across shader invocation.
148 // TODO: also bring the three vertex shader units here and implement the shader scheduler.
149 Shader::GSUnitState gs_unit;
150
151 GeometryPipeline geometry_pipeline;
152
142 // This is constructed with a dummy triangle topology 153 // This is constructed with a dummy triangle topology
143 PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; 154 PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
144}; 155};
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index acd2ac5e2..9c3dd4cab 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -17,15 +17,18 @@ template <typename VertexType>
17void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, 17void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
18 TriangleHandler triangle_handler) { 18 TriangleHandler triangle_handler) {
19 switch (topology) { 19 switch (topology) {
20 // TODO: Figure out what's different with TriangleTopology::Shader.
21 case PipelineRegs::TriangleTopology::List: 20 case PipelineRegs::TriangleTopology::List:
22 case PipelineRegs::TriangleTopology::Shader: 21 case PipelineRegs::TriangleTopology::Shader:
23 if (buffer_index < 2) { 22 if (buffer_index < 2) {
24 buffer[buffer_index++] = vtx; 23 buffer[buffer_index++] = vtx;
25 } else { 24 } else {
26 buffer_index = 0; 25 buffer_index = 0;
27 26 if (topology == PipelineRegs::TriangleTopology::Shader && winding) {
28 triangle_handler(buffer[0], buffer[1], vtx); 27 triangle_handler(buffer[1], buffer[0], vtx);
28 winding = false;
29 } else {
30 triangle_handler(buffer[0], buffer[1], vtx);
31 }
29 } 32 }
30 break; 33 break;
31 34
@@ -51,9 +54,15 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
51} 54}
52 55
53template <typename VertexType> 56template <typename VertexType>
57void PrimitiveAssembler<VertexType>::SetWinding() {
58 winding = true;
59}
60
61template <typename VertexType>
54void PrimitiveAssembler<VertexType>::Reset() { 62void PrimitiveAssembler<VertexType>::Reset() {
55 buffer_index = 0; 63 buffer_index = 0;
56 strip_ready = false; 64 strip_ready = false;
65 winding = false;
57} 66}
58 67
59template <typename VertexType> 68template <typename VertexType>
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index e8eccdf27..12de8e3b9 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -30,6 +30,12 @@ struct PrimitiveAssembler {
30 void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); 30 void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);
31 31
32 /** 32 /**
33 * Invert the vertex order of the next triangle. Called by geometry shader emitter.
34 * This only takes effect for TriangleTopology::Shader.
35 */
36 void SetWinding();
37
38 /**
33 * Resets the internal state of the PrimitiveAssembler. 39 * Resets the internal state of the PrimitiveAssembler.
34 */ 40 */
35 void Reset(); 41 void Reset();
@@ -45,6 +51,7 @@ private:
45 int buffer_index; 51 int buffer_index;
46 VertexType buffer[2]; 52 VertexType buffer[2];
47 bool strip_ready = false; 53 bool strip_ready = false;
54 bool winding = false;
48}; 55};
49 56
50} // namespace 57} // namespace
diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h
index 8b6369297..e78c3e331 100644
--- a/src/video_core/regs_pipeline.h
+++ b/src/video_core/regs_pipeline.h
@@ -147,7 +147,15 @@ struct PipelineRegs {
147 // Number of vertices to render 147 // Number of vertices to render
148 u32 num_vertices; 148 u32 num_vertices;
149 149
150 INSERT_PADDING_WORDS(0x1); 150 enum class UseGS : u32 {
151 No = 0,
152 Yes = 2,
153 };
154
155 union {
156 BitField<0, 2, UseGS> use_gs;
157 BitField<31, 1, u32> variable_primitive;
158 };
151 159
152 // The index of the first vertex to render 160 // The index of the first vertex to render
153 u32 vertex_offset; 161 u32 vertex_offset;
@@ -218,7 +226,29 @@ struct PipelineRegs {
218 226
219 GPUMode gpu_mode; 227 GPUMode gpu_mode;
220 228
221 INSERT_PADDING_WORDS(0x18); 229 INSERT_PADDING_WORDS(0x4);
230 BitField<0, 4, u32> vs_outmap_total_minus_1_a;
231 INSERT_PADDING_WORDS(0x6);
232 BitField<0, 4, u32> vs_outmap_total_minus_1_b;
233
234 enum class GSMode : u32 {
235 Point = 0,
236 VariablePrimitive = 1,
237 FixedPrimitive = 2,
238 };
239
240 union {
241 BitField<0, 8, GSMode> mode;
242 BitField<8, 4, u32> fixed_vertex_num_minus_1;
243 BitField<12, 4, u32> stride_minus_1;
244 BitField<16, 4, u32> start_index;
245 } gs_config;
246
247 INSERT_PADDING_WORDS(0x1);
248
249 u32 variable_vertex_main_num_minus_1;
250
251 INSERT_PADDING_WORDS(0x9);
222 252
223 enum class TriangleTopology : u32 { 253 enum class TriangleTopology : u32 {
224 List = 0, 254 List = 0,
diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h
index ddb1ee451..c15d4d162 100644
--- a/src/video_core/regs_shader.h
+++ b/src/video_core/regs_shader.h
@@ -24,9 +24,16 @@ struct ShaderRegs {
24 24
25 INSERT_PADDING_WORDS(0x4); 25 INSERT_PADDING_WORDS(0x4);
26 26
27 enum ShaderMode {
28 GS = 0x08,
29 VS = 0xA0,
30 };
31
27 union { 32 union {
28 // Number of input attributes to shader unit - 1 33 // Number of input attributes to shader unit - 1
29 BitField<0, 4, u32> max_input_attribute_index; 34 BitField<0, 4, u32> max_input_attribute_index;
35 BitField<8, 8, u32> input_to_uniform;
36 BitField<24, 8, ShaderMode> shader_mode;
30 }; 37 };
31 38
32 // Offset to shader program entry point (in words) 39 // Offset to shader program entry point (in words)
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 67ed19ba8..e9063e616 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -21,7 +21,8 @@ namespace Pica {
21 21
22namespace Shader { 22namespace Shader {
23 23
24OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { 24OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
25 const AttributeBuffer& input) {
25 // Setup output data 26 // Setup output data
26 union { 27 union {
27 OutputVertex ret{}; 28 OutputVertex ret{};
@@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) {
82 } 83 }
83} 84}
84 85
86UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {}
87
88GSEmitter::GSEmitter() {
89 handlers = new Handlers;
90}
91
92GSEmitter::~GSEmitter() {
93 delete handlers;
94}
95
96void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) {
97 ASSERT(vertex_id < 3);
98 std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin());
99 if (prim_emit) {
100 if (winding)
101 handlers->winding_setter();
102 for (size_t i = 0; i < buffer.size(); ++i) {
103 AttributeBuffer output;
104 unsigned int output_i = 0;
105 for (unsigned int reg : Common::BitSet<u32>(output_mask)) {
106 output.attr[output_i++] = buffer[i][reg];
107 }
108 handlers->vertex_handler(output);
109 }
110 }
111}
112
113GSUnitState::GSUnitState() : UnitState(&emitter) {}
114
115void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) {
116 emitter.handlers->vertex_handler = std::move(vertex_handler);
117 emitter.handlers->winding_setter = std::move(winding_setter);
118}
119
120void GSUnitState::ConfigOutput(const ShaderRegs& config) {
121 emitter.output_mask = config.output_mask;
122}
123
85MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); 124MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
86 125
87#ifdef ARCHITECTURE_x86_64 126#ifdef ARCHITECTURE_x86_64
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index e156f6aef..a3789da01 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <functional>
9#include <type_traits> 10#include <type_traits>
10#include <nihstro/shader_bytecode.h> 11#include <nihstro/shader_bytecode.h>
11#include "common/assert.h" 12#include "common/assert.h"
@@ -31,6 +32,12 @@ struct AttributeBuffer {
31 alignas(16) Math::Vec4<float24> attr[16]; 32 alignas(16) Math::Vec4<float24> attr[16];
32}; 33};
33 34
35/// Handler type for receiving vertex outputs from vertex shader or geometry shader
36using VertexHandler = std::function<void(const AttributeBuffer&)>;
37
38/// Handler type for signaling to invert the vertex order of the next triangle
39using WindingSetter = std::function<void()>;
40
34struct OutputVertex { 41struct OutputVertex {
35 Math::Vec4<float24> pos; 42 Math::Vec4<float24> pos;
36 Math::Vec4<float24> quat; 43 Math::Vec4<float24> quat;
@@ -43,7 +50,8 @@ struct OutputVertex {
43 INSERT_PADDING_WORDS(1); 50 INSERT_PADDING_WORDS(1);
44 Math::Vec2<float24> tc2; 51 Math::Vec2<float24> tc2;
45 52
46 static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); 53 static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs,
54 const AttributeBuffer& output);
47}; 55};
48#define ASSERT_POS(var, pos) \ 56#define ASSERT_POS(var, pos) \
49 static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ 57 static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
@@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
61static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); 69static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
62 70
63/** 71/**
72 * This structure contains state information for primitive emitting in geometry shader.
73 */
74struct GSEmitter {
75 std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer;
76 u8 vertex_id;
77 bool prim_emit;
78 bool winding;
79 u32 output_mask;
80
81 // Function objects are hidden behind a raw pointer to make the structure standard layout type,
82 // for JIT to use offsetof to access other members.
83 struct Handlers {
84 VertexHandler vertex_handler;
85 WindingSetter winding_setter;
86 } * handlers;
87
88 GSEmitter();
89 ~GSEmitter();
90 void Emit(Math::Vec4<float24> (&vertex)[16]);
91};
92static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type");
93
94/**
64 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 95 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
65 * has four shader units that process shaders in parallel. At the present, Citra only implements a 96 * has four shader units that process shaders in parallel. At the present, Citra only implements a
66 * single shader unit that processes all shaders serially. Putting the state information in a struct 97 * single shader unit that processes all shaders serially. Putting the state information in a struct
67 * here will make it easier for us to parallelize the shader processing later. 98 * here will make it easier for us to parallelize the shader processing later.
68 */ 99 */
69struct UnitState { 100struct UnitState {
101 explicit UnitState(GSEmitter* emitter = nullptr);
70 struct Registers { 102 struct Registers {
71 // The registers are accessed by the shader JIT using SSE instructions, and are therefore 103 // The registers are accessed by the shader JIT using SSE instructions, and are therefore
72 // required to be 16-byte aligned. 104 // required to be 16-byte aligned.
@@ -82,6 +114,8 @@ struct UnitState {
82 // TODO: How many bits do these actually have? 114 // TODO: How many bits do these actually have?
83 s32 address_registers[3]; 115 s32 address_registers[3];
84 116
117 GSEmitter* emitter_ptr;
118
85 static size_t InputOffset(const SourceRegister& reg) { 119 static size_t InputOffset(const SourceRegister& reg) {
86 switch (reg.GetRegisterType()) { 120 switch (reg.GetRegisterType()) {
87 case RegisterType::Input: 121 case RegisterType::Input:
@@ -125,6 +159,19 @@ struct UnitState {
125 void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); 159 void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
126}; 160};
127 161
162/**
163 * This is an extended shader unit state that represents the special unit that can run both vertex
164 * shader and geometry shader. It contains an additional primitive emitter and utilities for
165 * geometry shader.
166 */
167struct GSUnitState : public UnitState {
168 GSUnitState();
169 void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter);
170 void ConfigOutput(const ShaderRegs& config);
171
172 GSEmitter emitter;
173};
174
128struct ShaderSetup { 175struct ShaderSetup {
129 struct { 176 struct {
130 // The float uniforms are accessed by the shader JIT using SSE instructions, and are 177 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 206c0978a..9d4da4904 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
636 break; 636 break;
637 } 637 }
638 638
639 case OpCode::Id::EMIT: {
640 GSEmitter* emitter = state.emitter_ptr;
641 ASSERT_MSG(emitter, "Execute EMIT on VS");
642 emitter->Emit(state.registers.output);
643 break;
644 }
645
646 case OpCode::Id::SETEMIT: {
647 GSEmitter* emitter = state.emitter_ptr;
648 ASSERT_MSG(emitter, "Execute SETEMIT on VS");
649 emitter->vertex_id = instr.setemit.vertex_id;
650 emitter->prim_emit = instr.setemit.prim_emit != 0;
651 emitter->winding = instr.setemit.winding != 0;
652 break;
653 }
654
639 default: 655 default:
640 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 656 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
641 (int)instr.opcode.Value().EffectiveOpCode(), 657 (int)instr.opcode.Value().EffectiveOpCode(),
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 42a57aab1..1b31623bd 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -75,8 +75,8 @@ const JitFunction instr_table[64] = {
75 &JitShader::Compile_IF, // ifu 75 &JitShader::Compile_IF, // ifu
76 &JitShader::Compile_IF, // ifc 76 &JitShader::Compile_IF, // ifc
77 &JitShader::Compile_LOOP, // loop 77 &JitShader::Compile_LOOP, // loop
78 nullptr, // emit 78 &JitShader::Compile_EMIT, // emit
79 nullptr, // sete 79 &JitShader::Compile_SETE, // sete
80 &JitShader::Compile_JMP, // jmpc 80 &JitShader::Compile_JMP, // jmpc
81 &JitShader::Compile_JMP, // jmpu 81 &JitShader::Compile_JMP, // jmpu
82 &JitShader::Compile_CMP, // cmp 82 &JitShader::Compile_CMP, // cmp
@@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) {
772 } 772 }
773} 773}
774 774
775static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) {
776 emitter->Emit(*output);
777}
778
779void JitShader::Compile_EMIT(Instruction instr) {
780 Label have_emitter, end;
781 mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
782 test(rax, rax);
783 jnz(have_emitter);
784
785 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
786 mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS"));
787 CallFarFunction(*this, LogCritical);
788 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
789 jmp(end);
790
791 L(have_emitter);
792 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
793 mov(ABI_PARAM1, rax);
794 mov(ABI_PARAM2, STATE);
795 add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output)));
796 CallFarFunction(*this, Emit);
797 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
798 L(end);
799}
800
801void JitShader::Compile_SETE(Instruction instr) {
802 Label have_emitter, end;
803 mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
804 test(rax, rax);
805 jnz(have_emitter);
806
807 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
808 mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS"));
809 CallFarFunction(*this, LogCritical);
810 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
811 jmp(end);
812
813 L(have_emitter);
814 mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id);
815 mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit);
816 mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding);
817 L(end);
818}
819
775void JitShader::Compile_Block(unsigned end) { 820void JitShader::Compile_Block(unsigned end) {
776 while (program_counter < end) { 821 while (program_counter < end) {
777 Compile_NextInstr(); 822 Compile_NextInstr();
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
index 31af0ca48..4aee56b1d 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.h
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -66,6 +66,8 @@ public:
66 void Compile_JMP(Instruction instr); 66 void Compile_JMP(Instruction instr);
67 void Compile_CMP(Instruction instr); 67 void Compile_CMP(Instruction instr);
68 void Compile_MAD(Instruction instr); 68 void Compile_MAD(Instruction instr);
69 void Compile_EMIT(Instruction instr);
70 void Compile_SETE(Instruction instr);
69 71
70private: 72private:
71 void Compile_Block(unsigned end); 73 void Compile_Block(unsigned end);