summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Henrik Rydgard2016-04-28 19:01:47 +0200
committerGravatar Henrik Rydgard2016-04-28 19:05:55 +0200
commit47ff00881703eeab03d32e60289ac34b7f4a7994 (patch)
treeb3260ea48aba0cf7feee3cd338c6676dd4a3d604 /src
parentRemove late accesses to attribute_config (diff)
downloadyuzu-47ff00881703eeab03d32e60289ac34b7f4a7994.tar.gz
yuzu-47ff00881703eeab03d32e60289ac34b7f4a7994.tar.xz
yuzu-47ff00881703eeab03d32e60289ac34b7f4a7994.zip
Refactor: Extract VertexLoader from command_processor.cpp.
Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp134
-rw-r--r--src/video_core/shader/shader.h2
-rw-r--r--src/video_core/vertex_loader.cpp119
-rw-r--r--src/video_core/vertex_loader.h53
5 files changed, 185 insertions, 125 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..de4082b1f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRCS
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 utils.cpp
19 vertex_loader.cpp
19 video_core.cpp 20 video_core.cpp
20 ) 21 )
21 22
@@ -43,6 +44,7 @@ set(HEADERS
43 shader/shader_interpreter.h 44 shader/shader_interpreter.h
44 swrasterizer.h 45 swrasterizer.h
45 utils.h 46 utils.h
47 vertex_loader.h
46 video_core.h 48 video_core.h
47 ) 49 )
48 50
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 8030d17a7..8c3e982a3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -22,6 +22,7 @@
22#include "video_core/video_core.h" 22#include "video_core/video_core.h"
23#include "video_core/debug_utils/debug_utils.h" 23#include "video_core/debug_utils/debug_utils.h"
24#include "video_core/shader/shader_interpreter.h" 24#include "video_core/shader/shader_interpreter.h"
25#include "video_core/vertex_loader.h"
25 26
26namespace Pica { 27namespace Pica {
27 28
@@ -192,62 +193,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
192#if PICA_LOG_TEV 193#if PICA_LOG_TEV
193 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 194 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
194#endif 195#endif
195
196 if (g_debug_context) 196 if (g_debug_context)
197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
198 198
199 const auto& attribute_config = regs.vertex_attributes; 199 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
200 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 200 // Later, these can be compiled and cached.
201 int num_total_attributes = attribute_config.GetNumTotalAttributes(); 201 VertexLoader loader;
202 202 loader.Setup(regs);
203 // Information about internal vertex attributes
204 u32 vertex_attribute_sources[16];
205 boost::fill(vertex_attribute_sources, 0xdeadbeef);
206 u32 vertex_attribute_strides[16] = {};
207 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
208
209 u32 vertex_attribute_elements[16] = {};
210 u32 vertex_attribute_element_size[16] = {};
211 bool vertex_attribute_default[16] = {};
212 // Setup attribute data from loaders
213 for (int loader = 0; loader < 12; ++loader) {
214 const auto& loader_config = attribute_config.attribute_loaders[loader];
215
216 u32 offset = 0;
217
218 // TODO: What happens if a loader overwrites a previous one's data?
219 for (unsigned component = 0; component < loader_config.component_count; ++component) {
220 if (component >= 12) {
221 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
222 continue;
223 }
224
225 u32 attribute_index = loader_config.GetComponent(component);
226 if (attribute_index < 12) {
227 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
228 offset = Common::AlignUp(offset, element_size);
229 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
230 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
231 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
232 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
233 vertex_attribute_element_size[attribute_index] = element_size;
234 vertex_attribute_default[attribute_index] = attribute_config.IsDefaultAttribute(attribute_index);
235 offset += attribute_config.GetStride(attribute_index);
236 } else if (attribute_index < 16) {
237 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
238 offset = Common::AlignUp(offset, 4);
239 offset += (attribute_index - 11) * 4;
240 } else {
241 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
242 }
243 }
244 }
245 203
246 // Load vertices 204 // Load vertices
247 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 205 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
248 206
249 const auto& index_info = regs.index_array; 207 const auto& index_info = regs.index_array;
250 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); 208 const u8* index_address_8 = Memory::GetPhysicalPointer(loader.GetPhysicalBaseAddress() + index_info.offset);
251 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); 209 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
252 bool index_u16 = index_info.format != 0; 210 bool index_u16 = index_info.format != 0;
253 211
@@ -265,32 +223,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
265 } 223 }
266 } 224 }
267 225
268 class { 226 MemoryAccesses memory_accesses;
269 /// Combine overlapping and close ranges
270 void SimplifyRanges() {
271 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
272 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
273 auto it2 = std::next(it);
274 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
275 it->second = std::max(it->second, it2->first + it2->second - it->first);
276 it2 = ranges.erase(it2);
277 }
278 }
279 }
280
281 public:
282 /// Record a particular memory access in the list
283 void AddAccess(u32 paddr, u32 size) {
284 // Create new range or extend existing one
285 ranges[paddr] = std::max(ranges[paddr], size);
286
287 // Simplify ranges...
288 SimplifyRanges();
289 }
290
291 /// Map of accessed ranges (mapping start address to range size)
292 std::map<u32, u32> ranges;
293 } memory_accesses;
294 227
295 // Simple circular-replacement vertex cache 228 // Simple circular-replacement vertex cache
296 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 229 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -319,7 +252,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
319 if (is_indexed) { 252 if (is_indexed) {
320 if (g_debug_context && Pica::g_debug_context->recorder) { 253 if (g_debug_context && Pica::g_debug_context->recorder) {
321 int size = index_u16 ? 2 : 1; 254 int size = index_u16 ? 2 : 1;
322 memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); 255 memory_accesses.AddAccess(loader.GetPhysicalBaseAddress() + index_info.offset + size * index, size);
323 } 256 }
324 257
325 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { 258 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
@@ -334,60 +267,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
334 if (!vertex_cache_hit) { 267 if (!vertex_cache_hit) {
335 // Initialize data for the current vertex 268 // Initialize data for the current vertex
336 Shader::InputVertex input; 269 Shader::InputVertex input;
337 270 loader.LoadVertex(index, vertex, input, memory_accesses);
338 for (int i = 0; i < num_total_attributes; ++i) {
339 if (vertex_attribute_elements[i] != 0) {
340 // Default attribute values set if array elements have < 4 components. This
341 // is *not* carried over from the default attribute settings even if they're
342 // enabled for this attribute.
343 static const float24 zero = float24::FromFloat32(0.0f);
344 static const float24 one = float24::FromFloat32(1.0f);
345 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
346
347 // Load per-vertex data from the loader arrays
348 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
349 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
350 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
351
352 if (g_debug_context && Pica::g_debug_context->recorder) {
353 memory_accesses.AddAccess(source_addr,
354 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
355 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
356 }
357
358 const float srcval =
359 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
360 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
361 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
362 *reinterpret_cast<const float*>(srcdata);
363
364 input.attr[i][comp] = float24::FromFloat32(srcval);
365 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
366 comp, i, vertex, index,
367 base_address,
368 vertex_attribute_sources[i] - base_address,
369 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
370 input.attr[i][comp].ToFloat32());
371 }
372 } else if (vertex_attribute_default[i]) {
373 // Load the default attribute if we're configured to do so
374 input.attr[i] = g_state.vs.default_attributes[i];
375 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
376 i, vertex, index,
377 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
378 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
379 } else {
380 // TODO(yuriks): In this case, no data gets loaded and the vertex
381 // remains with the last value it had. This isn't currently maintained
382 // as global state, however, and so won't work in Citra yet.
383 }
384 }
385 271
386 if (g_debug_context) 272 if (g_debug_context)
387 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); 273 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
388 274
389 // Send to vertex shader 275 // Send to vertex shader
390 output = Shader::Run(shader_unit, input, num_total_attributes); 276 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
391 277
392 if (is_indexed) { 278 if (is_indexed) {
393 vertex_cache[vertex_cache_pos] = output; 279 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..9ce9344d2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -25,7 +25,7 @@ namespace Pica {
25namespace Shader { 25namespace Shader {
26 26
27struct InputVertex { 27struct InputVertex {
28 Math::Vec4<float24> attr[16]; 28 alignas(16) Math::Vec4<float24> attr[16];
29}; 29};
30 30
31struct OutputVertex { 31struct OutputVertex {
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..258002b07
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,119 @@
1#include <cmath>
2#include <string>
3
4#include "boost/range/algorithm/fill.hpp"
5
6#include "common/assert.h"
7#include "common/alignment.h"
8#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13#include "core/memory.h"
14
15#include "debug_utils/debug_utils.h"
16
17#include "pica.h"
18#include "pica_state.h"
19#include "pica_types.h"
20#include "vertex_loader.h"
21
22namespace Pica {
23
24void VertexLoader::Setup(const Pica::Regs &regs) {
25 const auto& attribute_config = regs.vertex_attributes;
26 base_address = attribute_config.GetPhysicalBaseAddress();
27 num_total_attributes = attribute_config.GetNumTotalAttributes();
28
29 boost::fill(vertex_attribute_sources, 0xdeadbeef);
30
31 for (int i = 0; i < 16; i++) {
32 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
33 }
34
35 // Setup attribute data from loaders
36 for (int loader = 0; loader < 12; ++loader) {
37 const auto& loader_config = attribute_config.attribute_loaders[loader];
38
39 u32 offset = 0;
40
41 // TODO: What happens if a loader overwrites a previous one's data?
42 for (unsigned component = 0; component < loader_config.component_count; ++component) {
43 if (component >= 12) {
44 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
45 continue;
46 }
47
48 u32 attribute_index = loader_config.GetComponent(component);
49 if (attribute_index < 12) {
50 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
51 offset = Common::AlignUp(offset, element_size);
52 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
53 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
54 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
55 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
56 vertex_attribute_element_size[attribute_index] = element_size;
57 offset += attribute_config.GetStride(attribute_index);
58 } else if (attribute_index < 16) {
59 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
60 offset = Common::AlignUp(offset, 4);
61 offset += (attribute_index - 11) * 4;
62 } else {
63 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
64 }
65 }
66 }
67}
68
69void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex &input, MemoryAccesses &memory_accesses) {
70 for (int i = 0; i < num_total_attributes; ++i) {
71 if (vertex_attribute_elements[i] != 0) {
72 // Default attribute values set if array elements have < 4 components. This
73 // is *not* carried over from the default attribute settings even if they're
74 // enabled for this attribute.
75 static const float24 zero = float24::FromFloat32(0.0f);
76 static const float24 one = float24::FromFloat32(1.0f);
77 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
78
79 // Load per-vertex data from the loader arrays
80 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
81 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
82 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
83
84 if (g_debug_context && Pica::g_debug_context->recorder) {
85 memory_accesses.AddAccess(source_addr,
86 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
87 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
88 }
89
90 const float srcval =
91 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
92 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
93 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
94 *reinterpret_cast<const float*>(srcdata);
95
96 input.attr[i][comp] = float24::FromFloat32(srcval);
97 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
98 comp, i, vertex, index,
99 base_address,
100 vertex_attribute_sources[i] - base_address,
101 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
102 input.attr[i][comp].ToFloat32());
103 }
104 } else if (vertex_attribute_is_default[i]) {
105 // Load the default attribute if we're configured to do so
106 input.attr[i] = g_state.vs.default_attributes[i];
107 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
108 i, vertex, index,
109 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
110 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
111 } else {
112 // TODO(yuriks): In this case, no data gets loaded and the vertex
113 // remains with the last value it had. This isn't currently maintained
114 // as global state, however, and so won't work in Citra yet.
115 }
116 }
117}
118
119} // namespace Pica \ No newline at end of file
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..4ff62d97b
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,53 @@
1#pragma once
2
3#include "video_core/pica.h"
4#include "video_core/shader/shader.h"
5
6namespace Pica {
7
8class MemoryAccesses {
9 /// Combine overlapping and close ranges
10 void SimplifyRanges() {
11 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
12 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
13 auto it2 = std::next(it);
14 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
15 it->second = std::max(it->second, it2->first + it2->second - it->first);
16 it2 = ranges.erase(it2);
17 }
18 }
19 }
20
21public:
22 /// Record a particular memory access in the list
23 void AddAccess(u32 paddr, u32 size) {
24 // Create new range or extend existing one
25 ranges[paddr] = std::max(ranges[paddr], size);
26
27 // Simplify ranges...
28 SimplifyRanges();
29 }
30
31 /// Map of accessed ranges (mapping start address to range size)
32 std::map<u32, u32> ranges;
33};
34
35class VertexLoader {
36public:
37 void Setup(const Pica::Regs &regs);
38 void LoadVertex(int index, int vertex, Shader::InputVertex &input, MemoryAccesses &memory_accesses);
39
40 u32 GetPhysicalBaseAddress() const { return base_address; }
41 int GetNumTotalAttributes() const { return num_total_attributes; }
42private:
43 u32 vertex_attribute_sources[16];
44 u32 vertex_attribute_strides[16] = {};
45 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
46 u32 vertex_attribute_elements[16] = {};
47 u32 vertex_attribute_element_size[16] = {};
48 bool vertex_attribute_is_default[16];
49 u32 base_address;
50 int num_total_attributes;
51};
52
53} // namespace Pica