diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 54 | ||||
| -rw-r--r-- | src/video_core/geometry_pipeline.cpp | 274 | ||||
| -rw-r--r-- | src/video_core/geometry_pipeline.h | 49 | ||||
| -rw-r--r-- | src/video_core/pica.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/pica_state.h | 11 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.h | 7 | ||||
| -rw-r--r-- | src/video_core/regs_pipeline.h | 34 | ||||
| -rw-r--r-- | src/video_core/regs_shader.h | 7 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 41 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 49 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.h | 2 |
15 files changed, 594 insertions, 37 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cffa4c952..82f47d8a9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | set(SRCS | 1 | set(SRCS |
| 2 | command_processor.cpp | 2 | command_processor.cpp |
| 3 | debug_utils/debug_utils.cpp | 3 | debug_utils/debug_utils.cpp |
| 4 | geometry_pipeline.cpp | ||
| 4 | pica.cpp | 5 | pica.cpp |
| 5 | primitive_assembly.cpp | 6 | primitive_assembly.cpp |
| 6 | regs.cpp | 7 | regs.cpp |
| @@ -29,6 +30,7 @@ set(SRCS | |||
| 29 | set(HEADERS | 30 | set(HEADERS |
| 30 | command_processor.h | 31 | command_processor.h |
| 31 | debug_utils/debug_utils.h | 32 | debug_utils/debug_utils.h |
| 33 | geometry_pipeline.h | ||
| 32 | gpu_debugger.h | 34 | gpu_debugger.h |
| 33 | pica.h | 35 | pica.h |
| 34 | pica_state.h | 36 | pica_state.h |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f98ca3302..fb65a3a0a 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 161 | 161 | ||
| 162 | case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): | 162 | case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): |
| 163 | g_state.immediate.current_attribute = 0; | 163 | g_state.immediate.current_attribute = 0; |
| 164 | g_state.immediate.reset_geometry_pipeline = true; | ||
| 164 | default_attr_counter = 0; | 165 | default_attr_counter = 0; |
| 165 | break; | 166 | break; |
| 166 | 167 | ||
| @@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 234 | shader_engine->Run(g_state.vs, shader_unit); | 235 | shader_engine->Run(g_state.vs, shader_unit); |
| 235 | shader_unit.WriteOutput(regs.vs, output); | 236 | shader_unit.WriteOutput(regs.vs, output); |
| 236 | 237 | ||
| 237 | // Send to renderer | 238 | // Send to geometry pipeline |
| 238 | using Pica::Shader::OutputVertex; | 239 | if (g_state.immediate.reset_geometry_pipeline) { |
| 239 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | 240 | g_state.geometry_pipeline.Reconfigure(); |
| 240 | const OutputVertex& v2) { | 241 | g_state.immediate.reset_geometry_pipeline = false; |
| 241 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | 242 | } |
| 242 | }; | 243 | ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); |
| 243 | 244 | g_state.geometry_pipeline.Setup(shader_engine); | |
| 244 | g_state.primitive_assembler.SubmitVertex( | 245 | g_state.geometry_pipeline.SubmitVertex(output); |
| 245 | Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), | ||
| 246 | AddTriangle); | ||
| 247 | } | 246 | } |
| 248 | } | 247 | } |
| 249 | } | 248 | } |
| @@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 321 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup | 320 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup |
| 322 | const size_t VERTEX_CACHE_SIZE = 32; | 321 | const size_t VERTEX_CACHE_SIZE = 32; |
| 323 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | 322 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; |
| 324 | std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; | 323 | std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; |
| 325 | Shader::OutputVertex output_vertex; | 324 | Shader::AttributeBuffer vs_output; |
| 326 | 325 | ||
| 327 | unsigned int vertex_cache_pos = 0; | 326 | unsigned int vertex_cache_pos = 0; |
| 328 | vertex_cache_ids.fill(-1); | 327 | vertex_cache_ids.fill(-1); |
| @@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 332 | 331 | ||
| 333 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | 332 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); |
| 334 | 333 | ||
| 334 | g_state.geometry_pipeline.Reconfigure(); | ||
| 335 | g_state.geometry_pipeline.Setup(shader_engine); | ||
| 336 | if (g_state.geometry_pipeline.NeedIndexInput()) | ||
| 337 | ASSERT(is_indexed); | ||
| 338 | |||
| 335 | for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { | 339 | for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { |
| 336 | // Indexed rendering doesn't use the start offset | 340 | // Indexed rendering doesn't use the start offset |
| 337 | unsigned int vertex = | 341 | unsigned int vertex = |
| @@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 345 | bool vertex_cache_hit = false; | 349 | bool vertex_cache_hit = false; |
| 346 | 350 | ||
| 347 | if (is_indexed) { | 351 | if (is_indexed) { |
| 352 | if (g_state.geometry_pipeline.NeedIndexInput()) { | ||
| 353 | g_state.geometry_pipeline.SubmitIndex(vertex); | ||
| 354 | continue; | ||
| 355 | } | ||
| 356 | |||
| 348 | if (g_debug_context && Pica::g_debug_context->recorder) { | 357 | if (g_debug_context && Pica::g_debug_context->recorder) { |
| 349 | int size = index_u16 ? 2 : 1; | 358 | int size = index_u16 ? 2 : 1; |
| 350 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, | 359 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, |
| @@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 353 | 362 | ||
| 354 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | 363 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { |
| 355 | if (vertex == vertex_cache_ids[i]) { | 364 | if (vertex == vertex_cache_ids[i]) { |
| 356 | output_vertex = vertex_cache[i]; | 365 | vs_output = vertex_cache[i]; |
| 357 | vertex_cache_hit = true; | 366 | vertex_cache_hit = true; |
| 358 | break; | 367 | break; |
| 359 | } | 368 | } |
| @@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 362 | 371 | ||
| 363 | if (!vertex_cache_hit) { | 372 | if (!vertex_cache_hit) { |
| 364 | // Initialize data for the current vertex | 373 | // Initialize data for the current vertex |
| 365 | Shader::AttributeBuffer input, output{}; | 374 | Shader::AttributeBuffer input; |
| 366 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | 375 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); |
| 367 | 376 | ||
| 368 | // Send to vertex shader | 377 | // Send to vertex shader |
| @@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 371 | (void*)&input); | 380 | (void*)&input); |
| 372 | shader_unit.LoadInput(regs.vs, input); | 381 | shader_unit.LoadInput(regs.vs, input); |
| 373 | shader_engine->Run(g_state.vs, shader_unit); | 382 | shader_engine->Run(g_state.vs, shader_unit); |
| 374 | shader_unit.WriteOutput(regs.vs, output); | 383 | shader_unit.WriteOutput(regs.vs, vs_output); |
| 375 | |||
| 376 | // Retrieve vertex from register data | ||
| 377 | output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); | ||
| 378 | 384 | ||
| 379 | if (is_indexed) { | 385 | if (is_indexed) { |
| 380 | vertex_cache[vertex_cache_pos] = output_vertex; | 386 | vertex_cache[vertex_cache_pos] = vs_output; |
| 381 | vertex_cache_ids[vertex_cache_pos] = vertex; | 387 | vertex_cache_ids[vertex_cache_pos] = vertex; |
| 382 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | 388 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; |
| 383 | } | 389 | } |
| 384 | } | 390 | } |
| 385 | 391 | ||
| 386 | // Send to renderer | 392 | // Send to geometry pipeline |
| 387 | using Pica::Shader::OutputVertex; | 393 | g_state.geometry_pipeline.SubmitVertex(vs_output); |
| 388 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||
| 389 | const OutputVertex& v2) { | ||
| 390 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 391 | }; | ||
| 392 | |||
| 393 | primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | ||
| 394 | } | 394 | } |
| 395 | 395 | ||
| 396 | for (auto& range : memory_accesses.ranges) { | 396 | for (auto& range : memory_accesses.ranges) { |
diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp new file mode 100644 index 000000000..b146e2ecb --- /dev/null +++ b/src/video_core/geometry_pipeline.cpp | |||
| @@ -0,0 +1,274 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/geometry_pipeline.h" | ||
| 6 | #include "video_core/pica_state.h" | ||
| 7 | #include "video_core/regs.h" | ||
| 8 | #include "video_core/renderer_base.h" | ||
| 9 | #include "video_core/video_core.h" | ||
| 10 | |||
| 11 | namespace Pica { | ||
| 12 | |||
| 13 | /// An attribute buffering interface for different pipeline modes | ||
| 14 | class GeometryPipelineBackend { | ||
| 15 | public: | ||
| 16 | virtual ~GeometryPipelineBackend() = default; | ||
| 17 | |||
| 18 | /// Checks if there is no incomplete data transfer | ||
| 19 | virtual bool IsEmpty() const = 0; | ||
| 20 | |||
| 21 | /// Checks if the pipeline needs a direct input from index buffer | ||
| 22 | virtual bool NeedIndexInput() const = 0; | ||
| 23 | |||
| 24 | /// Submits an index from index buffer | ||
| 25 | virtual void SubmitIndex(unsigned int val) = 0; | ||
| 26 | |||
| 27 | /** | ||
| 28 | * Submits vertex attributes | ||
| 29 | * @param input attributes of a vertex output from vertex shader | ||
| 30 | * @return if the buffer is full and the geometry shader should be invoked | ||
| 31 | */ | ||
| 32 | virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; | ||
| 33 | }; | ||
| 34 | |||
| 35 | // In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. | ||
| 36 | // The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is | ||
| 37 | // invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry | ||
| 38 | // shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices | ||
| 39 | // for one geometry shader invocation. | ||
| 40 | // TODO: what happens when the input size is not divisible by the output size? | ||
| 41 | class GeometryPipeline_Point : public GeometryPipelineBackend { | ||
| 42 | public: | ||
| 43 | GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { | ||
| 44 | ASSERT(regs.pipeline.variable_primitive == 0); | ||
| 45 | ASSERT(regs.gs.input_to_uniform == 0); | ||
| 46 | vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||
| 47 | size_t gs_input_num = regs.gs.max_input_attribute_index + 1; | ||
| 48 | ASSERT(gs_input_num % vs_output_num == 0); | ||
| 49 | buffer_cur = attribute_buffer.attr; | ||
| 50 | buffer_end = attribute_buffer.attr + gs_input_num; | ||
| 51 | } | ||
| 52 | |||
| 53 | bool IsEmpty() const override { | ||
| 54 | return buffer_cur == attribute_buffer.attr; | ||
| 55 | } | ||
| 56 | |||
| 57 | bool NeedIndexInput() const override { | ||
| 58 | return false; | ||
| 59 | } | ||
| 60 | |||
| 61 | void SubmitIndex(unsigned int val) override { | ||
| 62 | UNREACHABLE(); | ||
| 63 | } | ||
| 64 | |||
| 65 | bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||
| 66 | buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||
| 67 | if (buffer_cur == buffer_end) { | ||
| 68 | buffer_cur = attribute_buffer.attr; | ||
| 69 | unit.LoadInput(regs.gs, attribute_buffer); | ||
| 70 | return true; | ||
| 71 | } | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | private: | ||
| 76 | const Regs& regs; | ||
| 77 | Shader::GSUnitState& unit; | ||
| 78 | Shader::AttributeBuffer attribute_buffer; | ||
| 79 | Math::Vec4<float24>* buffer_cur; | ||
| 80 | Math::Vec4<float24>* buffer_end; | ||
| 81 | unsigned int vs_output_num; | ||
| 82 | }; | ||
| 83 | |||
| 84 | // In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the | ||
| 85 | // geometry shader unit. The number of vertex is variable, which is specified by the first index | ||
| 86 | // value in the batch. This mode is usually used for subdivision. | ||
| 87 | class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { | ||
| 88 | public: | ||
| 89 | GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||
| 90 | : regs(regs), setup(setup) { | ||
| 91 | ASSERT(regs.pipeline.variable_primitive == 1); | ||
| 92 | ASSERT(regs.gs.input_to_uniform == 1); | ||
| 93 | vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||
| 94 | } | ||
| 95 | |||
| 96 | bool IsEmpty() const override { | ||
| 97 | return need_index; | ||
| 98 | } | ||
| 99 | |||
| 100 | bool NeedIndexInput() const override { | ||
| 101 | return need_index; | ||
| 102 | } | ||
| 103 | |||
| 104 | void SubmitIndex(unsigned int val) override { | ||
| 105 | DEBUG_ASSERT(need_index); | ||
| 106 | |||
| 107 | // The number of vertex input is put to the uniform register | ||
| 108 | float24 vertex_num = float24::FromFloat32(val); | ||
| 109 | setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); | ||
| 110 | |||
| 111 | // The second uniform register and so on are used for receiving input vertices | ||
| 112 | buffer_cur = setup.uniforms.f + 1; | ||
| 113 | |||
| 114 | main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; | ||
| 115 | total_vertex_num = val; | ||
| 116 | need_index = false; | ||
| 117 | } | ||
| 118 | |||
| 119 | bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||
| 120 | DEBUG_ASSERT(!need_index); | ||
| 121 | if (main_vertex_num != 0) { | ||
| 122 | // For main vertices, receive all attributes | ||
| 123 | buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||
| 124 | --main_vertex_num; | ||
| 125 | } else { | ||
| 126 | // For other vertices, only receive the first attribute (usually the position) | ||
| 127 | *(buffer_cur++) = input.attr[0]; | ||
| 128 | } | ||
| 129 | --total_vertex_num; | ||
| 130 | |||
| 131 | if (total_vertex_num == 0) { | ||
| 132 | need_index = true; | ||
| 133 | return true; | ||
| 134 | } | ||
| 135 | |||
| 136 | return false; | ||
| 137 | } | ||
| 138 | |||
| 139 | private: | ||
| 140 | bool need_index = true; | ||
| 141 | const Regs& regs; | ||
| 142 | Shader::ShaderSetup& setup; | ||
| 143 | unsigned int main_vertex_num; | ||
| 144 | unsigned int total_vertex_num; | ||
| 145 | Math::Vec4<float24>* buffer_cur; | ||
| 146 | unsigned int vs_output_num; | ||
| 147 | }; | ||
| 148 | |||
| 149 | // In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry | ||
| 150 | // shader unit. The number of vertex per shader invocation is constant. This is usually used for | ||
| 151 | // particle system. | ||
| 152 | class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { | ||
| 153 | public: | ||
| 154 | GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||
| 155 | : regs(regs), setup(setup) { | ||
| 156 | ASSERT(regs.pipeline.variable_primitive == 0); | ||
| 157 | ASSERT(regs.gs.input_to_uniform == 1); | ||
| 158 | vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||
| 159 | ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); | ||
| 160 | size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; | ||
| 161 | buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; | ||
| 162 | buffer_end = buffer_begin + vs_output_num * vertex_num; | ||
| 163 | } | ||
| 164 | |||
| 165 | bool IsEmpty() const override { | ||
| 166 | return buffer_cur == buffer_begin; | ||
| 167 | } | ||
| 168 | |||
| 169 | bool NeedIndexInput() const override { | ||
| 170 | return false; | ||
| 171 | } | ||
| 172 | |||
| 173 | void SubmitIndex(unsigned int val) override { | ||
| 174 | UNREACHABLE(); | ||
| 175 | } | ||
| 176 | |||
| 177 | bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||
| 178 | buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||
| 179 | if (buffer_cur == buffer_end) { | ||
| 180 | buffer_cur = buffer_begin; | ||
| 181 | return true; | ||
| 182 | } | ||
| 183 | return false; | ||
| 184 | } | ||
| 185 | |||
| 186 | private: | ||
| 187 | const Regs& regs; | ||
| 188 | Shader::ShaderSetup& setup; | ||
| 189 | Math::Vec4<float24>* buffer_begin; | ||
| 190 | Math::Vec4<float24>* buffer_cur; | ||
| 191 | Math::Vec4<float24>* buffer_end; | ||
| 192 | unsigned int vs_output_num; | ||
| 193 | }; | ||
| 194 | |||
| 195 | GeometryPipeline::GeometryPipeline(State& state) : state(state) {} | ||
| 196 | |||
| 197 | GeometryPipeline::~GeometryPipeline() = default; | ||
| 198 | |||
| 199 | void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { | ||
| 200 | this->vertex_handler = vertex_handler; | ||
| 201 | } | ||
| 202 | |||
| 203 | void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { | ||
| 204 | if (!backend) | ||
| 205 | return; | ||
| 206 | |||
| 207 | this->shader_engine = shader_engine; | ||
| 208 | shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); | ||
| 209 | } | ||
| 210 | |||
| 211 | void GeometryPipeline::Reconfigure() { | ||
| 212 | ASSERT(!backend || backend->IsEmpty()); | ||
| 213 | |||
| 214 | if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { | ||
| 215 | backend = nullptr; | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | |||
| 219 | ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); | ||
| 220 | |||
| 221 | // The following assumes that when geometry shader is in use, the shader unit 3 is configured as | ||
| 222 | // a geometry shader unit. | ||
| 223 | // TODO: what happens if this is not true? | ||
| 224 | ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); | ||
| 225 | ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); | ||
| 226 | |||
| 227 | state.gs_unit.ConfigOutput(state.regs.gs); | ||
| 228 | |||
| 229 | ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == | ||
| 230 | state.regs.pipeline.vs_outmap_total_minus_1_b); | ||
| 231 | |||
| 232 | switch (state.regs.pipeline.gs_config.mode) { | ||
| 233 | case PipelineRegs::GSMode::Point: | ||
| 234 | backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit); | ||
| 235 | break; | ||
| 236 | case PipelineRegs::GSMode::VariablePrimitive: | ||
| 237 | backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs); | ||
| 238 | break; | ||
| 239 | case PipelineRegs::GSMode::FixedPrimitive: | ||
| 240 | backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs); | ||
| 241 | break; | ||
| 242 | default: | ||
| 243 | UNREACHABLE(); | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | bool GeometryPipeline::NeedIndexInput() const { | ||
| 248 | if (!backend) | ||
| 249 | return false; | ||
| 250 | return backend->NeedIndexInput(); | ||
| 251 | } | ||
| 252 | |||
| 253 | void GeometryPipeline::SubmitIndex(unsigned int val) { | ||
| 254 | backend->SubmitIndex(val); | ||
| 255 | } | ||
| 256 | |||
| 257 | void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { | ||
| 258 | if (!backend) { | ||
| 259 | // No backend means the geometry shader is disabled, so we send the vertex shader output | ||
| 260 | // directly to the primitive assembler. | ||
| 261 | vertex_handler(input); | ||
| 262 | } else { | ||
| 263 | if (backend->SubmitVertex(input)) { | ||
| 264 | shader_engine->Run(state.gs, state.gs_unit); | ||
| 265 | |||
| 266 | // The uniform b15 is set to true after every geometry shader invocation. This is useful | ||
| 267 | // for the shader to know if this is the first invocation in a batch, if the program set | ||
| 268 | // b15 to false first. | ||
| 269 | state.gs.uniforms.b[15] = true; | ||
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | } // namespace Pica | ||
diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h new file mode 100644 index 000000000..91fdd3192 --- /dev/null +++ b/src/video_core/geometry_pipeline.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include "video_core/shader/shader.h" | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | |||
| 12 | struct State; | ||
| 13 | |||
| 14 | class GeometryPipelineBackend; | ||
| 15 | |||
| 16 | /// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler | ||
| 17 | class GeometryPipeline { | ||
| 18 | public: | ||
| 19 | explicit GeometryPipeline(State& state); | ||
| 20 | ~GeometryPipeline(); | ||
| 21 | |||
| 22 | /// Sets the handler for receiving vertex outputs from vertex shader | ||
| 23 | void SetVertexHandler(Shader::VertexHandler vertex_handler); | ||
| 24 | |||
| 25 | /** | ||
| 26 | * Setup the geometry shader unit if it is in use | ||
| 27 | * @param shader_engine the shader engine for the geometry shader to run | ||
| 28 | */ | ||
| 29 | void Setup(Shader::ShaderEngine* shader_engine); | ||
| 30 | |||
| 31 | /// Reconfigures the pipeline according to current register settings | ||
| 32 | void Reconfigure(); | ||
| 33 | |||
| 34 | /// Checks if the pipeline needs a direct input from index buffer | ||
| 35 | bool NeedIndexInput() const; | ||
| 36 | |||
| 37 | /// Submits an index from index buffer. Call this only when NeedIndexInput returns true | ||
| 38 | void SubmitIndex(unsigned int val); | ||
| 39 | |||
| 40 | /// Submits vertex attributes output from vertex shader | ||
| 41 | void SubmitVertex(const Shader::AttributeBuffer& input); | ||
| 42 | |||
| 43 | private: | ||
| 44 | Shader::VertexHandler vertex_handler; | ||
| 45 | Shader::ShaderEngine* shader_engine; | ||
| 46 | std::unique_ptr<GeometryPipelineBackend> backend; | ||
| 47 | State& state; | ||
| 48 | }; | ||
| 49 | } // namespace Pica | ||
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index b95148a6a..218e06883 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp | |||
| @@ -3,9 +3,11 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include "video_core/geometry_pipeline.h" | ||
| 6 | #include "video_core/pica.h" | 7 | #include "video_core/pica.h" |
| 7 | #include "video_core/pica_state.h" | 8 | #include "video_core/pica_state.h" |
| 8 | #include "video_core/regs_pipeline.h" | 9 | #include "video_core/renderer_base.h" |
| 10 | #include "video_core/video_core.h" | ||
| 9 | 11 | ||
| 10 | namespace Pica { | 12 | namespace Pica { |
| 11 | 13 | ||
| @@ -24,6 +26,23 @@ void Zero(T& o) { | |||
| 24 | memset(&o, 0, sizeof(o)); | 26 | memset(&o, 0, sizeof(o)); |
| 25 | } | 27 | } |
| 26 | 28 | ||
| 29 | State::State() : geometry_pipeline(*this) { | ||
| 30 | auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { | ||
| 31 | using Pica::Shader::OutputVertex; | ||
| 32 | auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, | ||
| 33 | const OutputVertex& v2) { | ||
| 34 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 35 | }; | ||
| 36 | primitive_assembler.SubmitVertex( | ||
| 37 | Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); | ||
| 38 | }; | ||
| 39 | |||
| 40 | auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; | ||
| 41 | |||
| 42 | g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); | ||
| 43 | g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); | ||
| 44 | } | ||
| 45 | |||
| 27 | void State::Reset() { | 46 | void State::Reset() { |
| 28 | Zero(regs); | 47 | Zero(regs); |
| 29 | Zero(vs); | 48 | Zero(vs); |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 864a2c9e6..c6634a0bc 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/vector_math.h" | 10 | #include "common/vector_math.h" |
| 11 | #include "video_core/geometry_pipeline.h" | ||
| 11 | #include "video_core/primitive_assembly.h" | 12 | #include "video_core/primitive_assembly.h" |
| 12 | #include "video_core/regs.h" | 13 | #include "video_core/regs.h" |
| 13 | #include "video_core/shader/shader.h" | 14 | #include "video_core/shader/shader.h" |
| @@ -16,6 +17,7 @@ namespace Pica { | |||
| 16 | 17 | ||
| 17 | /// Struct used to describe current Pica state | 18 | /// Struct used to describe current Pica state |
| 18 | struct State { | 19 | struct State { |
| 20 | State(); | ||
| 19 | void Reset(); | 21 | void Reset(); |
| 20 | 22 | ||
| 21 | /// Pica registers | 23 | /// Pica registers |
| @@ -137,8 +139,17 @@ struct State { | |||
| 137 | Shader::AttributeBuffer input_vertex; | 139 | Shader::AttributeBuffer input_vertex; |
| 138 | // Index of the next attribute to be loaded into `input_vertex`. | 140 | // Index of the next attribute to be loaded into `input_vertex`. |
| 139 | u32 current_attribute = 0; | 141 | u32 current_attribute = 0; |
| 142 | // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure | ||
| 143 | bool reset_geometry_pipeline = true; | ||
| 140 | } immediate; | 144 | } immediate; |
| 141 | 145 | ||
| 146 | // the geometry shader needs to be kept in the global state because some shaders relie on | ||
| 147 | // preserved register value across shader invocation. | ||
| 148 | // TODO: also bring the three vertex shader units here and implement the shader scheduler. | ||
| 149 | Shader::GSUnitState gs_unit; | ||
| 150 | |||
| 151 | GeometryPipeline geometry_pipeline; | ||
| 152 | |||
| 142 | // This is constructed with a dummy triangle topology | 153 | // This is constructed with a dummy triangle topology |
| 143 | PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; | 154 | PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; |
| 144 | }; | 155 | }; |
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index acd2ac5e2..9c3dd4cab 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -17,15 +17,18 @@ template <typename VertexType> | |||
| 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, |
| 18 | TriangleHandler triangle_handler) { | 18 | TriangleHandler triangle_handler) { |
| 19 | switch (topology) { | 19 | switch (topology) { |
| 20 | // TODO: Figure out what's different with TriangleTopology::Shader. | ||
| 21 | case PipelineRegs::TriangleTopology::List: | 20 | case PipelineRegs::TriangleTopology::List: |
| 22 | case PipelineRegs::TriangleTopology::Shader: | 21 | case PipelineRegs::TriangleTopology::Shader: |
| 23 | if (buffer_index < 2) { | 22 | if (buffer_index < 2) { |
| 24 | buffer[buffer_index++] = vtx; | 23 | buffer[buffer_index++] = vtx; |
| 25 | } else { | 24 | } else { |
| 26 | buffer_index = 0; | 25 | buffer_index = 0; |
| 27 | 26 | if (topology == PipelineRegs::TriangleTopology::Shader && winding) { | |
| 28 | triangle_handler(buffer[0], buffer[1], vtx); | 27 | triangle_handler(buffer[1], buffer[0], vtx); |
| 28 | winding = false; | ||
| 29 | } else { | ||
| 30 | triangle_handler(buffer[0], buffer[1], vtx); | ||
| 31 | } | ||
| 29 | } | 32 | } |
| 30 | break; | 33 | break; |
| 31 | 34 | ||
| @@ -51,9 +54,15 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | |||
| 51 | } | 54 | } |
| 52 | 55 | ||
| 53 | template <typename VertexType> | 56 | template <typename VertexType> |
| 57 | void PrimitiveAssembler<VertexType>::SetWinding() { | ||
| 58 | winding = true; | ||
| 59 | } | ||
| 60 | |||
| 61 | template <typename VertexType> | ||
| 54 | void PrimitiveAssembler<VertexType>::Reset() { | 62 | void PrimitiveAssembler<VertexType>::Reset() { |
| 55 | buffer_index = 0; | 63 | buffer_index = 0; |
| 56 | strip_ready = false; | 64 | strip_ready = false; |
| 65 | winding = false; | ||
| 57 | } | 66 | } |
| 58 | 67 | ||
| 59 | template <typename VertexType> | 68 | template <typename VertexType> |
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index e8eccdf27..12de8e3b9 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h | |||
| @@ -30,6 +30,12 @@ struct PrimitiveAssembler { | |||
| 30 | void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); | 30 | void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); |
| 31 | 31 | ||
| 32 | /** | 32 | /** |
| 33 | * Invert the vertex order of the next triangle. Called by geometry shader emitter. | ||
| 34 | * This only takes effect for TriangleTopology::Shader. | ||
| 35 | */ | ||
| 36 | void SetWinding(); | ||
| 37 | |||
| 38 | /** | ||
| 33 | * Resets the internal state of the PrimitiveAssembler. | 39 | * Resets the internal state of the PrimitiveAssembler. |
| 34 | */ | 40 | */ |
| 35 | void Reset(); | 41 | void Reset(); |
| @@ -45,6 +51,7 @@ private: | |||
| 45 | int buffer_index; | 51 | int buffer_index; |
| 46 | VertexType buffer[2]; | 52 | VertexType buffer[2]; |
| 47 | bool strip_ready = false; | 53 | bool strip_ready = false; |
| 54 | bool winding = false; | ||
| 48 | }; | 55 | }; |
| 49 | 56 | ||
| 50 | } // namespace | 57 | } // namespace |
diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 8b6369297..e78c3e331 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h | |||
| @@ -147,7 +147,15 @@ struct PipelineRegs { | |||
| 147 | // Number of vertices to render | 147 | // Number of vertices to render |
| 148 | u32 num_vertices; | 148 | u32 num_vertices; |
| 149 | 149 | ||
| 150 | INSERT_PADDING_WORDS(0x1); | 150 | enum class UseGS : u32 { |
| 151 | No = 0, | ||
| 152 | Yes = 2, | ||
| 153 | }; | ||
| 154 | |||
| 155 | union { | ||
| 156 | BitField<0, 2, UseGS> use_gs; | ||
| 157 | BitField<31, 1, u32> variable_primitive; | ||
| 158 | }; | ||
| 151 | 159 | ||
| 152 | // The index of the first vertex to render | 160 | // The index of the first vertex to render |
| 153 | u32 vertex_offset; | 161 | u32 vertex_offset; |
| @@ -218,7 +226,29 @@ struct PipelineRegs { | |||
| 218 | 226 | ||
| 219 | GPUMode gpu_mode; | 227 | GPUMode gpu_mode; |
| 220 | 228 | ||
| 221 | INSERT_PADDING_WORDS(0x18); | 229 | INSERT_PADDING_WORDS(0x4); |
| 230 | BitField<0, 4, u32> vs_outmap_total_minus_1_a; | ||
| 231 | INSERT_PADDING_WORDS(0x6); | ||
| 232 | BitField<0, 4, u32> vs_outmap_total_minus_1_b; | ||
| 233 | |||
| 234 | enum class GSMode : u32 { | ||
| 235 | Point = 0, | ||
| 236 | VariablePrimitive = 1, | ||
| 237 | FixedPrimitive = 2, | ||
| 238 | }; | ||
| 239 | |||
| 240 | union { | ||
| 241 | BitField<0, 8, GSMode> mode; | ||
| 242 | BitField<8, 4, u32> fixed_vertex_num_minus_1; | ||
| 243 | BitField<12, 4, u32> stride_minus_1; | ||
| 244 | BitField<16, 4, u32> start_index; | ||
| 245 | } gs_config; | ||
| 246 | |||
| 247 | INSERT_PADDING_WORDS(0x1); | ||
| 248 | |||
| 249 | u32 variable_vertex_main_num_minus_1; | ||
| 250 | |||
| 251 | INSERT_PADDING_WORDS(0x9); | ||
| 222 | 252 | ||
| 223 | enum class TriangleTopology : u32 { | 253 | enum class TriangleTopology : u32 { |
| 224 | List = 0, | 254 | List = 0, |
diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h index ddb1ee451..c15d4d162 100644 --- a/src/video_core/regs_shader.h +++ b/src/video_core/regs_shader.h | |||
| @@ -24,9 +24,16 @@ struct ShaderRegs { | |||
| 24 | 24 | ||
| 25 | INSERT_PADDING_WORDS(0x4); | 25 | INSERT_PADDING_WORDS(0x4); |
| 26 | 26 | ||
| 27 | enum ShaderMode { | ||
| 28 | GS = 0x08, | ||
| 29 | VS = 0xA0, | ||
| 30 | }; | ||
| 31 | |||
| 27 | union { | 32 | union { |
| 28 | // Number of input attributes to shader unit - 1 | 33 | // Number of input attributes to shader unit - 1 |
| 29 | BitField<0, 4, u32> max_input_attribute_index; | 34 | BitField<0, 4, u32> max_input_attribute_index; |
| 35 | BitField<8, 8, u32> input_to_uniform; | ||
| 36 | BitField<24, 8, ShaderMode> shader_mode; | ||
| 30 | }; | 37 | }; |
| 31 | 38 | ||
| 32 | // Offset to shader program entry point (in words) | 39 | // Offset to shader program entry point (in words) |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 67ed19ba8..e9063e616 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -21,7 +21,8 @@ namespace Pica { | |||
| 21 | 21 | ||
| 22 | namespace Shader { | 22 | namespace Shader { |
| 23 | 23 | ||
| 24 | OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { | 24 | OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, |
| 25 | const AttributeBuffer& input) { | ||
| 25 | // Setup output data | 26 | // Setup output data |
| 26 | union { | 27 | union { |
| 27 | OutputVertex ret{}; | 28 | OutputVertex ret{}; |
| @@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { | |||
| 82 | } | 83 | } |
| 83 | } | 84 | } |
| 84 | 85 | ||
| 86 | UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} | ||
| 87 | |||
| 88 | GSEmitter::GSEmitter() { | ||
| 89 | handlers = new Handlers; | ||
| 90 | } | ||
| 91 | |||
| 92 | GSEmitter::~GSEmitter() { | ||
| 93 | delete handlers; | ||
| 94 | } | ||
| 95 | |||
| 96 | void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) { | ||
| 97 | ASSERT(vertex_id < 3); | ||
| 98 | std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); | ||
| 99 | if (prim_emit) { | ||
| 100 | if (winding) | ||
| 101 | handlers->winding_setter(); | ||
| 102 | for (size_t i = 0; i < buffer.size(); ++i) { | ||
| 103 | AttributeBuffer output; | ||
| 104 | unsigned int output_i = 0; | ||
| 105 | for (unsigned int reg : Common::BitSet<u32>(output_mask)) { | ||
| 106 | output.attr[output_i++] = buffer[i][reg]; | ||
| 107 | } | ||
| 108 | handlers->vertex_handler(output); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | GSUnitState::GSUnitState() : UnitState(&emitter) {} | ||
| 114 | |||
| 115 | void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { | ||
| 116 | emitter.handlers->vertex_handler = std::move(vertex_handler); | ||
| 117 | emitter.handlers->winding_setter = std::move(winding_setter); | ||
| 118 | } | ||
| 119 | |||
| 120 | void GSUnitState::ConfigOutput(const ShaderRegs& config) { | ||
| 121 | emitter.output_mask = config.output_mask; | ||
| 122 | } | ||
| 123 | |||
| 85 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 124 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 86 | 125 | ||
| 87 | #ifdef ARCHITECTURE_x86_64 | 126 | #ifdef ARCHITECTURE_x86_64 |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index e156f6aef..a3789da01 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <functional> | ||
| 9 | #include <type_traits> | 10 | #include <type_traits> |
| 10 | #include <nihstro/shader_bytecode.h> | 11 | #include <nihstro/shader_bytecode.h> |
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| @@ -31,6 +32,12 @@ struct AttributeBuffer { | |||
| 31 | alignas(16) Math::Vec4<float24> attr[16]; | 32 | alignas(16) Math::Vec4<float24> attr[16]; |
| 32 | }; | 33 | }; |
| 33 | 34 | ||
| 35 | /// Handler type for receiving vertex outputs from vertex shader or geometry shader | ||
| 36 | using VertexHandler = std::function<void(const AttributeBuffer&)>; | ||
| 37 | |||
| 38 | /// Handler type for signaling to invert the vertex order of the next triangle | ||
| 39 | using WindingSetter = std::function<void()>; | ||
| 40 | |||
| 34 | struct OutputVertex { | 41 | struct OutputVertex { |
| 35 | Math::Vec4<float24> pos; | 42 | Math::Vec4<float24> pos; |
| 36 | Math::Vec4<float24> quat; | 43 | Math::Vec4<float24> quat; |
| @@ -43,7 +50,8 @@ struct OutputVertex { | |||
| 43 | INSERT_PADDING_WORDS(1); | 50 | INSERT_PADDING_WORDS(1); |
| 44 | Math::Vec2<float24> tc2; | 51 | Math::Vec2<float24> tc2; |
| 45 | 52 | ||
| 46 | static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); | 53 | static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, |
| 54 | const AttributeBuffer& output); | ||
| 47 | }; | 55 | }; |
| 48 | #define ASSERT_POS(var, pos) \ | 56 | #define ASSERT_POS(var, pos) \ |
| 49 | static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ | 57 | static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ |
| @@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | |||
| 61 | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); | 69 | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); |
| 62 | 70 | ||
| 63 | /** | 71 | /** |
| 72 | * This structure contains state information for primitive emitting in geometry shader. | ||
| 73 | */ | ||
| 74 | struct GSEmitter { | ||
| 75 | std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer; | ||
| 76 | u8 vertex_id; | ||
| 77 | bool prim_emit; | ||
| 78 | bool winding; | ||
| 79 | u32 output_mask; | ||
| 80 | |||
| 81 | // Function objects are hidden behind a raw pointer to make the structure standard layout type, | ||
| 82 | // for JIT to use offsetof to access other members. | ||
| 83 | struct Handlers { | ||
| 84 | VertexHandler vertex_handler; | ||
| 85 | WindingSetter winding_setter; | ||
| 86 | } * handlers; | ||
| 87 | |||
| 88 | GSEmitter(); | ||
| 89 | ~GSEmitter(); | ||
| 90 | void Emit(Math::Vec4<float24> (&vertex)[16]); | ||
| 91 | }; | ||
| 92 | static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type"); | ||
| 93 | |||
| 94 | /** | ||
| 64 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 95 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 65 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 96 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 66 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 97 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 67 | * here will make it easier for us to parallelize the shader processing later. | 98 | * here will make it easier for us to parallelize the shader processing later. |
| 68 | */ | 99 | */ |
| 69 | struct UnitState { | 100 | struct UnitState { |
| 101 | explicit UnitState(GSEmitter* emitter = nullptr); | ||
| 70 | struct Registers { | 102 | struct Registers { |
| 71 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 103 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| 72 | // required to be 16-byte aligned. | 104 | // required to be 16-byte aligned. |
| @@ -82,6 +114,8 @@ struct UnitState { | |||
| 82 | // TODO: How many bits do these actually have? | 114 | // TODO: How many bits do these actually have? |
| 83 | s32 address_registers[3]; | 115 | s32 address_registers[3]; |
| 84 | 116 | ||
| 117 | GSEmitter* emitter_ptr; | ||
| 118 | |||
| 85 | static size_t InputOffset(const SourceRegister& reg) { | 119 | static size_t InputOffset(const SourceRegister& reg) { |
| 86 | switch (reg.GetRegisterType()) { | 120 | switch (reg.GetRegisterType()) { |
| 87 | case RegisterType::Input: | 121 | case RegisterType::Input: |
| @@ -125,6 +159,19 @@ struct UnitState { | |||
| 125 | void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); | 159 | void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); |
| 126 | }; | 160 | }; |
| 127 | 161 | ||
| 162 | /** | ||
| 163 | * This is an extended shader unit state that represents the special unit that can run both vertex | ||
| 164 | * shader and geometry shader. It contains an additional primitive emitter and utilities for | ||
| 165 | * geometry shader. | ||
| 166 | */ | ||
| 167 | struct GSUnitState : public UnitState { | ||
| 168 | GSUnitState(); | ||
| 169 | void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); | ||
| 170 | void ConfigOutput(const ShaderRegs& config); | ||
| 171 | |||
| 172 | GSEmitter emitter; | ||
| 173 | }; | ||
| 174 | |||
| 128 | struct ShaderSetup { | 175 | struct ShaderSetup { |
| 129 | struct { | 176 | struct { |
| 130 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are | 177 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 206c0978a..9d4da4904 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | |||
| 636 | break; | 636 | break; |
| 637 | } | 637 | } |
| 638 | 638 | ||
| 639 | case OpCode::Id::EMIT: { | ||
| 640 | GSEmitter* emitter = state.emitter_ptr; | ||
| 641 | ASSERT_MSG(emitter, "Execute EMIT on VS"); | ||
| 642 | emitter->Emit(state.registers.output); | ||
| 643 | break; | ||
| 644 | } | ||
| 645 | |||
| 646 | case OpCode::Id::SETEMIT: { | ||
| 647 | GSEmitter* emitter = state.emitter_ptr; | ||
| 648 | ASSERT_MSG(emitter, "Execute SETEMIT on VS"); | ||
| 649 | emitter->vertex_id = instr.setemit.vertex_id; | ||
| 650 | emitter->prim_emit = instr.setemit.prim_emit != 0; | ||
| 651 | emitter->winding = instr.setemit.winding != 0; | ||
| 652 | break; | ||
| 653 | } | ||
| 654 | |||
| 639 | default: | 655 | default: |
| 640 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 656 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 641 | (int)instr.opcode.Value().EffectiveOpCode(), | 657 | (int)instr.opcode.Value().EffectiveOpCode(), |
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 42a57aab1..1b31623bd 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp | |||
| @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { | |||
| 75 | &JitShader::Compile_IF, // ifu | 75 | &JitShader::Compile_IF, // ifu |
| 76 | &JitShader::Compile_IF, // ifc | 76 | &JitShader::Compile_IF, // ifc |
| 77 | &JitShader::Compile_LOOP, // loop | 77 | &JitShader::Compile_LOOP, // loop |
| 78 | nullptr, // emit | 78 | &JitShader::Compile_EMIT, // emit |
| 79 | nullptr, // sete | 79 | &JitShader::Compile_SETE, // sete |
| 80 | &JitShader::Compile_JMP, // jmpc | 80 | &JitShader::Compile_JMP, // jmpc |
| 81 | &JitShader::Compile_JMP, // jmpu | 81 | &JitShader::Compile_JMP, // jmpu |
| 82 | &JitShader::Compile_CMP, // cmp | 82 | &JitShader::Compile_CMP, // cmp |
| @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { | |||
| 772 | } | 772 | } |
| 773 | } | 773 | } |
| 774 | 774 | ||
| 775 | static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) { | ||
| 776 | emitter->Emit(*output); | ||
| 777 | } | ||
| 778 | |||
| 779 | void JitShader::Compile_EMIT(Instruction instr) { | ||
| 780 | Label have_emitter, end; | ||
| 781 | mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||
| 782 | test(rax, rax); | ||
| 783 | jnz(have_emitter); | ||
| 784 | |||
| 785 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 786 | mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS")); | ||
| 787 | CallFarFunction(*this, LogCritical); | ||
| 788 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 789 | jmp(end); | ||
| 790 | |||
| 791 | L(have_emitter); | ||
| 792 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 793 | mov(ABI_PARAM1, rax); | ||
| 794 | mov(ABI_PARAM2, STATE); | ||
| 795 | add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output))); | ||
| 796 | CallFarFunction(*this, Emit); | ||
| 797 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 798 | L(end); | ||
| 799 | } | ||
| 800 | |||
| 801 | void JitShader::Compile_SETE(Instruction instr) { | ||
| 802 | Label have_emitter, end; | ||
| 803 | mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||
| 804 | test(rax, rax); | ||
| 805 | jnz(have_emitter); | ||
| 806 | |||
| 807 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 808 | mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS")); | ||
| 809 | CallFarFunction(*this, LogCritical); | ||
| 810 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 811 | jmp(end); | ||
| 812 | |||
| 813 | L(have_emitter); | ||
| 814 | mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); | ||
| 815 | mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); | ||
| 816 | mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); | ||
| 817 | L(end); | ||
| 818 | } | ||
| 819 | |||
| 775 | void JitShader::Compile_Block(unsigned end) { | 820 | void JitShader::Compile_Block(unsigned end) { |
| 776 | while (program_counter < end) { | 821 | while (program_counter < end) { |
| 777 | Compile_NextInstr(); | 822 | Compile_NextInstr(); |
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 31af0ca48..4aee56b1d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h | |||
| @@ -66,6 +66,8 @@ public: | |||
| 66 | void Compile_JMP(Instruction instr); | 66 | void Compile_JMP(Instruction instr); |
| 67 | void Compile_CMP(Instruction instr); | 67 | void Compile_CMP(Instruction instr); |
| 68 | void Compile_MAD(Instruction instr); | 68 | void Compile_MAD(Instruction instr); |
| 69 | void Compile_EMIT(Instruction instr); | ||
| 70 | void Compile_SETE(Instruction instr); | ||
| 69 | 71 | ||
| 70 | private: | 72 | private: |
| 71 | void Compile_Block(unsigned end); | 73 | void Compile_Block(unsigned end); |