summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2017-10-04 11:40:29 -0400
committerGravatar GitHub2017-10-04 11:40:29 -0400
commitc1355ada4be4909d68db345d8c00b0dd48376538 (patch)
tree9ee44a0381366fb635b695f05b4b19a0be8113f7 /src
parentMerge pull request #2977 from Subv/shmem_create (diff)
parentExtracted the attribute setup and draw commands into their own functions (diff)
downloadyuzu-c1355ada4be4909d68db345d8c00b0dd48376538.tar.gz
yuzu-c1355ada4be4909d68db345d8c00b0dd48376538.tar.xz
yuzu-c1355ada4be4909d68db345d8c00b0dd48376538.zip
Merge pull request #2985 from huwpascoe/pica_reg
Extracted the attribute setup and draw commands into their own functions
Diffstat (limited to 'src')
-rw-r--r--src/video_core/command_processor.cpp439
1 files changed, 222 insertions, 217 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3ab4af374..caf9f7a06 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
119 } 119 }
120} 120}
121 121
122static void LoadDefaultVertexAttributes(u32 register_value) {
123 auto& regs = g_state.regs;
124
125 // TODO: Does actual hardware indeed keep an intermediate buffer or does
126 // it directly write the values?
127 default_attr_write_buffer[default_attr_counter++] = register_value;
128
129 // Default attributes are written in a packed format such that four float24 values are encoded
130 // in three 32-bit numbers.
131 // We write to internal memory once a full such vector is written.
132 if (default_attr_counter >= 3) {
133 default_attr_counter = 0;
134
135 auto& setup = regs.pipeline.vs_default_attributes_setup;
136
137 if (setup.index >= 16) {
138 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
139 return;
140 }
141
142 Math::Vec4<float24> attribute;
143
144 // NOTE: The destination component order indeed is "backwards"
145 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
146 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
147 ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
148 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
149 ((default_attr_write_buffer[2] >> 24) & 0xFF));
150 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
151
152 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
153 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
154 attribute.w.ToFloat32());
155
156 // TODO: Verify that this actually modifies the register!
157 if (setup.index < 15) {
158 g_state.input_default_attributes.attr[setup.index] = attribute;
159 setup.index++;
160 } else {
161 // Put each attribute into an immediate input buffer. When all specified immediate
162 // attributes are present, the Vertex Shader is invoked and everything is sent to
163 // the primitive assembler.
164
165 auto& immediate_input = g_state.immediate.input_vertex;
166 auto& immediate_attribute_id = g_state.immediate.current_attribute;
167
168 immediate_input.attr[immediate_attribute_id] = attribute;
169
170 if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
171 immediate_attribute_id += 1;
172 } else {
173 MICROPROFILE_SCOPE(GPU_Drawing);
174 immediate_attribute_id = 0;
175
176 auto* shader_engine = Shader::GetEngine();
177 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
178
179 // Send to vertex shader
180 if (g_debug_context)
181 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
182 static_cast<void*>(&immediate_input));
183 Shader::UnitState shader_unit;
184 Shader::AttributeBuffer output{};
185
186 shader_unit.LoadInput(regs.vs, immediate_input);
187 shader_engine->Run(g_state.vs, shader_unit);
188 shader_unit.WriteOutput(regs.vs, output);
189
190 // Send to geometry pipeline
191 if (g_state.immediate.reset_geometry_pipeline) {
192 g_state.geometry_pipeline.Reconfigure();
193 g_state.immediate.reset_geometry_pipeline = false;
194 }
195 ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
196 g_state.geometry_pipeline.Setup(shader_engine);
197 g_state.geometry_pipeline.SubmitVertex(output);
198
199 // TODO: If drawing after every immediate mode triangle kills performance,
200 // change it to flush triangles whenever a drawing config register changes
201 // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
202 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
203 if (g_debug_context) {
204 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
205 }
206 }
207 }
208 }
209}
210
211static void Draw(u32 command_id) {
212 MICROPROFILE_SCOPE(GPU_Drawing);
213 auto& regs = g_state.regs;
214
215#if PICA_LOG_TEV
216 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
217#endif
218 if (g_debug_context)
219 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
220
221 // Processes information about internal vertex attributes to figure out how a vertex is
222 // loaded.
223 // Later, these can be compiled and cached.
224 const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
225 VertexLoader loader(regs.pipeline);
226
227 // Load vertices
228 bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
229
230 const auto& index_info = regs.pipeline.index_array;
231 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
232 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
233 bool index_u16 = index_info.format != 0;
234
235 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
236
237 if (g_debug_context && g_debug_context->recorder) {
238 for (int i = 0; i < 3; ++i) {
239 const auto texture = regs.texturing.GetTextures()[i];
240 if (!texture.enabled)
241 continue;
242
243 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
244 g_debug_context->recorder->MemoryAccessed(
245 texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
246 texture.config.width / 2 * texture.config.height,
247 texture.config.GetPhysicalAddress());
248 }
249 }
250
251 DebugUtils::MemoryAccessTracker memory_accesses;
252
253 // Simple circular-replacement vertex cache
254 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
255 const size_t VERTEX_CACHE_SIZE = 32;
256 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
257 std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
258 Shader::AttributeBuffer vs_output;
259
260 unsigned int vertex_cache_pos = 0;
261 vertex_cache_ids.fill(-1);
262
263 auto* shader_engine = Shader::GetEngine();
264 Shader::UnitState shader_unit;
265
266 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
267
268 g_state.geometry_pipeline.Reconfigure();
269 g_state.geometry_pipeline.Setup(shader_engine);
270 if (g_state.geometry_pipeline.NeedIndexInput())
271 ASSERT(is_indexed);
272
273 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
274 // Indexed rendering doesn't use the start offset
275 unsigned int vertex = is_indexed
276 ? (index_u16 ? index_address_16[index] : index_address_8[index])
277 : (index + regs.pipeline.vertex_offset);
278
279 // -1 is a common special value used for primitive restart. Since it's unknown if
280 // the PICA supports it, and it would mess up the caching, guard against it here.
281 ASSERT(vertex != -1);
282
283 bool vertex_cache_hit = false;
284
285 if (is_indexed) {
286 if (g_state.geometry_pipeline.NeedIndexInput()) {
287 g_state.geometry_pipeline.SubmitIndex(vertex);
288 continue;
289 }
290
291 if (g_debug_context && Pica::g_debug_context->recorder) {
292 int size = index_u16 ? 2 : 1;
293 memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
294 }
295
296 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
297 if (vertex == vertex_cache_ids[i]) {
298 vs_output = vertex_cache[i];
299 vertex_cache_hit = true;
300 break;
301 }
302 }
303 }
304
305 if (!vertex_cache_hit) {
306 // Initialize data for the current vertex
307 Shader::AttributeBuffer input;
308 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
309
310 // Send to vertex shader
311 if (g_debug_context)
312 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
313 (void*)&input);
314 shader_unit.LoadInput(regs.vs, input);
315 shader_engine->Run(g_state.vs, shader_unit);
316 shader_unit.WriteOutput(regs.vs, vs_output);
317
318 if (is_indexed) {
319 vertex_cache[vertex_cache_pos] = vs_output;
320 vertex_cache_ids[vertex_cache_pos] = vertex;
321 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
322 }
323 }
324
325 // Send to geometry pipeline
326 g_state.geometry_pipeline.SubmitVertex(vs_output);
327 }
328
329 for (auto& range : memory_accesses.ranges) {
330 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
331 range.second, range.first);
332 }
333
334 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
335 if (g_debug_context) {
336 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
337 }
338}
339
122static void WritePicaReg(u32 id, u32 value, u32 mask) { 340static void WritePicaReg(u32 id, u32 value, u32 mask) {
123 auto& regs = g_state.regs; 341 auto& regs = g_state.regs;
124 342
@@ -168,95 +386,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
168 // Load default vertex input attributes 386 // Load default vertex input attributes
169 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233): 387 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
170 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234): 388 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
171 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): { 389 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235):
172 // TODO: Does actual hardware indeed keep an intermediate buffer or does 390 LoadDefaultVertexAttributes(value);
173 // it directly write the values?
174 default_attr_write_buffer[default_attr_counter++] = value;
175
176 // Default attributes are written in a packed format such that four float24 values are
177 // encoded in
178 // three 32-bit numbers. We write to internal memory once a full such vector is
179 // written.
180 if (default_attr_counter >= 3) {
181 default_attr_counter = 0;
182
183 auto& setup = regs.pipeline.vs_default_attributes_setup;
184
185 if (setup.index >= 16) {
186 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
187 break;
188 }
189
190 Math::Vec4<float24> attribute;
191
192 // NOTE: The destination component order indeed is "backwards"
193 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
194 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
195 ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
196 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
197 ((default_attr_write_buffer[2] >> 24) & 0xFF));
198 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
199
200 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
201 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
202 attribute.w.ToFloat32());
203
204 // TODO: Verify that this actually modifies the register!
205 if (setup.index < 15) {
206 g_state.input_default_attributes.attr[setup.index] = attribute;
207 setup.index++;
208 } else {
209 // Put each attribute into an immediate input buffer. When all specified immediate
210 // attributes are present, the Vertex Shader is invoked and everything is sent to
211 // the primitive assembler.
212
213 auto& immediate_input = g_state.immediate.input_vertex;
214 auto& immediate_attribute_id = g_state.immediate.current_attribute;
215
216 immediate_input.attr[immediate_attribute_id] = attribute;
217
218 if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
219 immediate_attribute_id += 1;
220 } else {
221 MICROPROFILE_SCOPE(GPU_Drawing);
222 immediate_attribute_id = 0;
223
224 auto* shader_engine = Shader::GetEngine();
225 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
226
227 // Send to vertex shader
228 if (g_debug_context)
229 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
230 static_cast<void*>(&immediate_input));
231 Shader::UnitState shader_unit;
232 Shader::AttributeBuffer output{};
233
234 shader_unit.LoadInput(regs.vs, immediate_input);
235 shader_engine->Run(g_state.vs, shader_unit);
236 shader_unit.WriteOutput(regs.vs, output);
237
238 // Send to geometry pipeline
239 if (g_state.immediate.reset_geometry_pipeline) {
240 g_state.geometry_pipeline.Reconfigure();
241 g_state.immediate.reset_geometry_pipeline = false;
242 }
243 ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
244 g_state.geometry_pipeline.Setup(shader_engine);
245 g_state.geometry_pipeline.SubmitVertex(output);
246
247 // TODO: If drawing after every immediate mode triangle kills performance,
248 // change it to flush triangles whenever a drawing config register changes
249 // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
250 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
251 if (g_debug_context) {
252 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch,
253 nullptr);
254 }
255 }
256 }
257 }
258 break; 391 break;
259 }
260 392
261 case PICA_REG_INDEX(pipeline.gpu_mode): 393 case PICA_REG_INDEX(pipeline.gpu_mode):
262 // This register likely just enables vertex processing and doesn't need any special handling 394 // This register likely just enables vertex processing and doesn't need any special handling
@@ -275,136 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
275 407
276 // It seems like these trigger vertex rendering 408 // It seems like these trigger vertex rendering
277 case PICA_REG_INDEX(pipeline.trigger_draw): 409 case PICA_REG_INDEX(pipeline.trigger_draw):
278 case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { 410 case PICA_REG_INDEX(pipeline.trigger_draw_indexed):
279 MICROPROFILE_SCOPE(GPU_Drawing); 411 Draw(id);
280
281#if PICA_LOG_TEV
282 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
283#endif
284 if (g_debug_context)
285 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
286
287 // Processes information about internal vertex attributes to figure out how a vertex is
288 // loaded.
289 // Later, these can be compiled and cached.
290 const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
291 VertexLoader loader(regs.pipeline);
292
293 // Load vertices
294 bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
295
296 const auto& index_info = regs.pipeline.index_array;
297 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
298 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
299 bool index_u16 = index_info.format != 0;
300
301 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
302
303 if (g_debug_context && g_debug_context->recorder) {
304 for (int i = 0; i < 3; ++i) {
305 const auto texture = regs.texturing.GetTextures()[i];
306 if (!texture.enabled)
307 continue;
308
309 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
310 g_debug_context->recorder->MemoryAccessed(
311 texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
312 texture.config.width / 2 * texture.config.height,
313 texture.config.GetPhysicalAddress());
314 }
315 }
316
317 DebugUtils::MemoryAccessTracker memory_accesses;
318
319 // Simple circular-replacement vertex cache
320 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
321 const size_t VERTEX_CACHE_SIZE = 32;
322 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
323 std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
324 Shader::AttributeBuffer vs_output;
325
326 unsigned int vertex_cache_pos = 0;
327 vertex_cache_ids.fill(-1);
328
329 auto* shader_engine = Shader::GetEngine();
330 Shader::UnitState shader_unit;
331
332 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
333
334 g_state.geometry_pipeline.Reconfigure();
335 g_state.geometry_pipeline.Setup(shader_engine);
336 if (g_state.geometry_pipeline.NeedIndexInput())
337 ASSERT(is_indexed);
338
339 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
340 // Indexed rendering doesn't use the start offset
341 unsigned int vertex =
342 is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
343 : (index + regs.pipeline.vertex_offset);
344
345 // -1 is a common special value used for primitive restart. Since it's unknown if
346 // the PICA supports it, and it would mess up the caching, guard against it here.
347 ASSERT(vertex != -1);
348
349 bool vertex_cache_hit = false;
350
351 if (is_indexed) {
352 if (g_state.geometry_pipeline.NeedIndexInput()) {
353 g_state.geometry_pipeline.SubmitIndex(vertex);
354 continue;
355 }
356
357 if (g_debug_context && Pica::g_debug_context->recorder) {
358 int size = index_u16 ? 2 : 1;
359 memory_accesses.AddAccess(base_address + index_info.offset + size * index,
360 size);
361 }
362
363 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
364 if (vertex == vertex_cache_ids[i]) {
365 vs_output = vertex_cache[i];
366 vertex_cache_hit = true;
367 break;
368 }
369 }
370 }
371
372 if (!vertex_cache_hit) {
373 // Initialize data for the current vertex
374 Shader::AttributeBuffer input;
375 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
376
377 // Send to vertex shader
378 if (g_debug_context)
379 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
380 (void*)&input);
381 shader_unit.LoadInput(regs.vs, input);
382 shader_engine->Run(g_state.vs, shader_unit);
383 shader_unit.WriteOutput(regs.vs, vs_output);
384
385 if (is_indexed) {
386 vertex_cache[vertex_cache_pos] = vs_output;
387 vertex_cache_ids[vertex_cache_pos] = vertex;
388 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
389 }
390 }
391
392 // Send to geometry pipeline
393 g_state.geometry_pipeline.SubmitVertex(vs_output);
394 }
395
396 for (auto& range : memory_accesses.ranges) {
397 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
398 range.second, range.first);
399 }
400
401 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
402 if (g_debug_context) {
403 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
404 }
405
406 break; 412 break;
407 }
408 413
409 case PICA_REG_INDEX(gs.bool_uniforms): 414 case PICA_REG_INDEX(gs.bool_uniforms):
410 WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); 415 WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value());