summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Henrik Rydgard2016-04-28 21:43:49 +0200
committerGravatar Henrik Rydgard2016-04-28 22:21:39 +0200
commit251f29dd7fa530997cd6d27a8db28c4a39efd127 (patch)
tree186abc57e94e88f8d52b9f1410a223fcd37ce27d /src
parentDon't keep base_address in the loader, it doesn't belong there (with it, the ... (diff)
downloadyuzu-251f29dd7fa530997cd6d27a8db28c4a39efd127.tar.gz
yuzu-251f29dd7fa530997cd6d27a8db28c4a39efd127.tar.xz
yuzu-251f29dd7fa530997cd6d27a8db28c4a39efd127.zip
Optimize the vertex loader, nearly doubling its speed.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/vertex_loader.cpp85
-rw-r--r--src/video_core/vertex_loader.h1
2 files changed, 54 insertions, 32 deletions
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index c2630d9eb..38682d2be 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -46,13 +46,11 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
46 46
47 u32 attribute_index = loader_config.GetComponent(component); 47 u32 attribute_index = loader_config.GetComponent(component);
48 if (attribute_index < 12) { 48 if (attribute_index < 12) {
49 int element_size = attribute_config.GetElementSizeInBytes(attribute_index); 49 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
50 offset = Common::AlignUp(offset, element_size);
51 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; 50 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
52 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); 51 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
53 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); 52 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
54 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); 53 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
55 vertex_attribute_element_size[attribute_index] = element_size;
56 offset += attribute_config.GetStride(attribute_index); 54 offset += attribute_config.GetStride(attribute_index);
57 } else if (attribute_index < 16) { 55 } else if (attribute_index < 16) {
58 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively 56 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
@@ -68,38 +66,63 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
68void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { 66void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) {
69 for (int i = 0; i < num_total_attributes; ++i) { 67 for (int i = 0; i < num_total_attributes; ++i) {
70 if (vertex_attribute_elements[i] != 0) { 68 if (vertex_attribute_elements[i] != 0) {
71 // Default attribute values set if array elements have < 4 components. This
72 // is *not* carried over from the default attribute settings even if they're
73 // enabled for this attribute.
74 static const float24 zero = float24::FromFloat32(0.0f);
75 static const float24 one = float24::FromFloat32(1.0f);
76 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
77
78 // Load per-vertex data from the loader arrays 69 // Load per-vertex data from the loader arrays
79 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 70 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
80 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; 71
81 const u8* srcdata = Memory::GetPhysicalPointer(source_addr); 72 if (g_debug_context && Pica::g_debug_context->recorder) {
82 73 memory_accesses.AddAccess(source_addr,
83 if (g_debug_context && Pica::g_debug_context->recorder) { 74 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
84 memory_accesses.AddAccess(source_addr, 75 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
85 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 76 }
86 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); 77
78 switch (vertex_attribute_formats[i]) {
79 case Regs::VertexAttributeFormat::BYTE:
80 {
81 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
82 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
83 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
87 } 84 }
85 break;
86 }
87 case Regs::VertexAttributeFormat::UBYTE:
88 {
89 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
90 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
91 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
92 }
93 break;
94 }
95 case Regs::VertexAttributeFormat::SHORT:
96 {
97 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
98 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
99 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
100 }
101 break;
102 }
103 case Regs::VertexAttributeFormat::FLOAT:
104 {
105 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
106 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
107 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
108 }
109 break;
110 }
111 }
88 112
89 const float srcval = 113 // Default attribute values set if array elements have < 4 components. This
90 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) : 114 // is *not* carried over from the default attribute settings even if they're
91 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : 115 // enabled for this attribute.
92 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : 116 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
93 *reinterpret_cast<const float*>(srcdata); 117 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
94
95 input.attr[i][comp] = float24::FromFloat32(srcval);
96 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
97 comp, i, vertex, index,
98 base_address,
99 vertex_attribute_sources[i],
100 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
101 input.attr[i][comp].ToFloat32());
102 } 118 }
119
120 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
121 vertex_attribute_elements[i], i, vertex, index,
122 base_address,
123 vertex_attribute_sources[i],
124 vertex_attribute_strides[i] * vertex,
125 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
103 } else if (vertex_attribute_is_default[i]) { 126 } else if (vertex_attribute_is_default[i]) {
104 // Load the default attribute if we're configured to do so 127 // Load the default attribute if we're configured to do so
105 input.attr[i] = g_state.vs.default_attributes[i]; 128 input.attr[i] = g_state.vs.default_attributes[i];
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
index 40c5e83c9..7267ea9c6 100644
--- a/src/video_core/vertex_loader.h
+++ b/src/video_core/vertex_loader.h
@@ -47,7 +47,6 @@ private:
47 u32 vertex_attribute_strides[16] = {}; 47 u32 vertex_attribute_strides[16] = {};
48 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; 48 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
49 u32 vertex_attribute_elements[16] = {}; 49 u32 vertex_attribute_elements[16] = {};
50 u32 vertex_attribute_element_size[16] = {};
51 bool vertex_attribute_is_default[16]; 50 bool vertex_attribute_is_default[16];
52 int num_total_attributes; 51 int num_total_attributes;
53}; 52};