summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2016-04-29 09:42:47 -0400
committerGravatar bunnei2016-04-29 09:42:47 -0400
commit90243c56fb90d7d74cbef40da3eec97d967c10a2 (patch)
tree94d223001196ca9b774a8d018535ba2be8de1b01 /src
parentCommon: Remove section measurement from profiler (#1731) (diff)
parentMove and rename the MemoryAccesses class to MemoryAccessTracker. (diff)
downloadyuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.gz
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.xz
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.zip
Merge pull request #1730 from hrydgard/vertex-loader
* Remove late accesses to attribute_config * Refactor: Extract VertexLoader from command_processor.cpp. Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached. * Move "&" to their proper place, add missing includes and make some properly relative. * Don't keep base_address in the loader, it doesn't belong there (with it, the loader can't be cached). * Optimize the vertex loader, nearly doubling its speed. * Debugger fix * Move and rename the MemoryAccesses class to MemoryAccessTracker.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp129
-rw-r--r--src/video_core/debug_utils/debug_utils.h30
-rw-r--r--src/video_core/shader/shader.h2
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h28
6 files changed, 210 insertions, 121 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..de4082b1f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRCS
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 utils.cpp
19 vertex_loader.cpp
19 video_core.cpp 20 video_core.cpp
20 ) 21 )
21 22
@@ -43,6 +44,7 @@ set(HEADERS
43 shader/shader_interpreter.h 44 shader/shader_interpreter.h
44 swrasterizer.h 45 swrasterizer.h
45 utils.h 46 utils.h
47 vertex_loader.h
46 video_core.h 48 video_core.h
47 ) 49 )
48 50
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 97ba8214e..58883e374 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -21,6 +21,7 @@
21#include "video_core/video_core.h" 21#include "video_core/video_core.h"
22#include "video_core/debug_utils/debug_utils.h" 22#include "video_core/debug_utils/debug_utils.h"
23#include "video_core/shader/shader_interpreter.h" 23#include "video_core/shader/shader_interpreter.h"
24#include "video_core/vertex_loader.h"
24 25
25namespace Pica { 26namespace Pica {
26 27
@@ -188,54 +189,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
188#if PICA_LOG_TEV 189#if PICA_LOG_TEV
189 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 190 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
190#endif 191#endif
191
192 if (g_debug_context) 192 if (g_debug_context)
193 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 193 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
194 194
195 const auto& attribute_config = regs.vertex_attributes; 195 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
196 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 196 // Later, these can be compiled and cached.
197 197 VertexLoader loader;
198 // Information about internal vertex attributes 198 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
199 u32 vertex_attribute_sources[16]; 199 loader.Setup(regs);
200 boost::fill(vertex_attribute_sources, 0xdeadbeef);
201 u32 vertex_attribute_strides[16] = {};
202 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
203
204 u32 vertex_attribute_elements[16] = {};
205 u32 vertex_attribute_element_size[16] = {};
206
207 // Setup attribute data from loaders
208 for (int loader = 0; loader < 12; ++loader) {
209 const auto& loader_config = attribute_config.attribute_loaders[loader];
210
211 u32 offset = 0;
212
213 // TODO: What happens if a loader overwrites a previous one's data?
214 for (unsigned component = 0; component < loader_config.component_count; ++component) {
215 if (component >= 12) {
216 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
217 continue;
218 }
219
220 u32 attribute_index = loader_config.GetComponent(component);
221 if (attribute_index < 12) {
222 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
223 offset = Common::AlignUp(offset, element_size);
224 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
225 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
226 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
227 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
228 vertex_attribute_element_size[attribute_index] = element_size;
229 offset += attribute_config.GetStride(attribute_index);
230 } else if (attribute_index < 16) {
231 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
232 offset = Common::AlignUp(offset, 4);
233 offset += (attribute_index - 11) * 4;
234 } else {
235 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
236 }
237 }
238 }
239 200
240 // Load vertices 201 // Load vertices
241 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 202 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -259,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
259 } 220 }
260 } 221 }
261 222
262 class { 223 DebugUtils::MemoryAccessTracker memory_accesses;
263 /// Combine overlapping and close ranges
264 void SimplifyRanges() {
265 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
266 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
267 auto it2 = std::next(it);
268 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
269 it->second = std::max(it->second, it2->first + it2->second - it->first);
270 it2 = ranges.erase(it2);
271 }
272 }
273 }
274
275 public:
276 /// Record a particular memory access in the list
277 void AddAccess(u32 paddr, u32 size) {
278 // Create new range or extend existing one
279 ranges[paddr] = std::max(ranges[paddr], size);
280
281 // Simplify ranges...
282 SimplifyRanges();
283 }
284
285 /// Map of accessed ranges (mapping start address to range size)
286 std::map<u32, u32> ranges;
287 } memory_accesses;
288 224
289 // Simple circular-replacement vertex cache 225 // Simple circular-replacement vertex cache
290 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 226 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -328,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
328 if (!vertex_cache_hit) { 264 if (!vertex_cache_hit) {
329 // Initialize data for the current vertex 265 // Initialize data for the current vertex
330 Shader::InputVertex input; 266 Shader::InputVertex input;
331 267 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
332 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
333 if (vertex_attribute_elements[i] != 0) {
334 // Default attribute values set if array elements have < 4 components. This
335 // is *not* carried over from the default attribute settings even if they're
336 // enabled for this attribute.
337 static const float24 zero = float24::FromFloat32(0.0f);
338 static const float24 one = float24::FromFloat32(1.0f);
339 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
340
341 // Load per-vertex data from the loader arrays
342 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
343 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
344 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
345
346 if (g_debug_context && Pica::g_debug_context->recorder) {
347 memory_accesses.AddAccess(source_addr,
348 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
349 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
350 }
351
352 const float srcval =
353 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
354 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
355 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
356 *reinterpret_cast<const float*>(srcdata);
357
358 input.attr[i][comp] = float24::FromFloat32(srcval);
359 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
360 comp, i, vertex, index,
361 attribute_config.GetPhysicalBaseAddress(),
362 vertex_attribute_sources[i] - base_address,
363 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
364 input.attr[i][comp].ToFloat32());
365 }
366 } else if (attribute_config.IsDefaultAttribute(i)) {
367 // Load the default attribute if we're configured to do so
368 input.attr[i] = g_state.vs.default_attributes[i];
369 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
370 i, vertex, index,
371 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
372 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
373 } else {
374 // TODO(yuriks): In this case, no data gets loaded and the vertex
375 // remains with the last value it had. This isn't currently maintained
376 // as global state, however, and so won't work in Citra yet.
377 }
378 }
379 268
380 if (g_debug_context) 269 if (g_debug_context)
381 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); 270 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
382 271
383 // Send to vertex shader 272 // Send to vertex shader
384 output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); 273 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
385 274
386 if (is_indexed) { 275 if (is_indexed) {
387 vertex_cache[vertex_cache_pos] = output; 276 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 56f9bd958..dd0828cee 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -216,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
216 216
217void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 217void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
218 218
219/**
220 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
221 */
222class MemoryAccessTracker {
223 /// Combine overlapping and close ranges
224 void SimplifyRanges() {
225 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
226 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
227 auto it2 = std::next(it);
228 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
229 it->second = std::max(it->second, it2->first + it2->second - it->first);
230 it2 = ranges.erase(it2);
231 }
232 }
233 }
234
235public:
236 /// Record a particular memory access in the list
237 void AddAccess(u32 paddr, u32 size) {
238 // Create new range or extend existing one
239 ranges[paddr] = std::max(ranges[paddr], size);
240
241 // Simplify ranges...
242 SimplifyRanges();
243 }
244
245 /// Map of accessed ranges (mapping start address to range size)
246 std::map<u32, u32> ranges;
247};
248
219} // namespace 249} // namespace
220 250
221} // namespace 251} // namespace
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..9ce9344d2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -25,7 +25,7 @@ namespace Pica {
25namespace Shader { 25namespace Shader {
26 26
27struct InputVertex { 27struct InputVertex {
28 Math::Vec4<float24> attr[16]; 28 alignas(16) Math::Vec4<float24> attr[16];
29}; 29};
30 30
31struct OutputVertex { 31struct OutputVertex {
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..8a3d91896
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
1#include <cmath>
2#include <string>
3
4#include "boost/range/algorithm/fill.hpp"
5
6#include "common/assert.h"
7#include "common/alignment.h"
8#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13#include "core/memory.h"
14
15#include "video_core/debug_utils/debug_utils.h"
16#include "video_core/pica.h"
17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
19#include "video_core/vertex_loader.h"
20
21namespace Pica {
22
23void VertexLoader::Setup(const Pica::Regs& regs) {
24 const auto& attribute_config = regs.vertex_attributes;
25 num_total_attributes = attribute_config.GetNumTotalAttributes();
26
27 boost::fill(vertex_attribute_sources, 0xdeadbeef);
28
29 for (int i = 0; i < 16; i++) {
30 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
31 }
32
33 // Setup attribute data from loaders
34 for (int loader = 0; loader < 12; ++loader) {
35 const auto& loader_config = attribute_config.attribute_loaders[loader];
36
37 u32 offset = 0;
38
39 // TODO: What happens if a loader overwrites a previous one's data?
40 for (unsigned component = 0; component < loader_config.component_count; ++component) {
41 if (component >= 12) {
42 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
43 continue;
44 }
45
46 u32 attribute_index = loader_config.GetComponent(component);
47 if (attribute_index < 12) {
48 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
49 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
50 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
51 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
52 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
53 offset += attribute_config.GetStride(attribute_index);
54 } else if (attribute_index < 16) {
55 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
56 offset = Common::AlignUp(offset, 4);
57 offset += (attribute_index - 11) * 4;
58 } else {
59 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
60 }
61 }
62 }
63}
64
65void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
66 for (int i = 0; i < num_total_attributes; ++i) {
67 if (vertex_attribute_elements[i] != 0) {
68 // Load per-vertex data from the loader arrays
69 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
70
71 if (g_debug_context && Pica::g_debug_context->recorder) {
72 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
73 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
74 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
75 }
76
77 switch (vertex_attribute_formats[i]) {
78 case Regs::VertexAttributeFormat::BYTE:
79 {
80 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
81 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
82 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
83 }
84 break;
85 }
86 case Regs::VertexAttributeFormat::UBYTE:
87 {
88 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
89 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
90 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
91 }
92 break;
93 }
94 case Regs::VertexAttributeFormat::SHORT:
95 {
96 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
97 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
98 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
99 }
100 break;
101 }
102 case Regs::VertexAttributeFormat::FLOAT:
103 {
104 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
105 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
106 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
107 }
108 break;
109 }
110 }
111
112 // Default attribute values set if array elements have < 4 components. This
113 // is *not* carried over from the default attribute settings even if they're
114 // enabled for this attribute.
115 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
116 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
117 }
118
119 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
120 vertex_attribute_elements[i], i, vertex, index,
121 base_address,
122 vertex_attribute_sources[i],
123 vertex_attribute_strides[i] * vertex,
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs.default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
131 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
132 } else {
133 // TODO(yuriks): In this case, no data gets loaded and the vertex
134 // remains with the last value it had. This isn't currently maintained
135 // as global state, however, and so won't work in Citra yet.
136 }
137 }
138}
139
140} // namespace Pica \ No newline at end of file
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..ff42d1596
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,28 @@
1#pragma once
2
3#include <iterator>
4#include <algorithm>
5
6#include "video_core/pica.h"
7#include "video_core/shader/shader.h"
8#include "video_core/debug_utils/debug_utils.h"
9
10namespace Pica {
11
12class VertexLoader {
13public:
14 void Setup(const Pica::Regs& regs);
15 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
16
17 int GetNumTotalAttributes() const { return num_total_attributes; }
18
19private:
20 u32 vertex_attribute_sources[16];
21 u32 vertex_attribute_strides[16] = {};
22 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
23 u32 vertex_attribute_elements[16] = {};
24 bool vertex_attribute_is_default[16];
25 int num_total_attributes;
26};
27
28} // namespace Pica