diff options
| author | 2016-04-28 21:43:49 +0200 | |
|---|---|---|
| committer | 2016-04-28 22:21:39 +0200 | |
| commit | 251f29dd7fa530997cd6d27a8db28c4a39efd127 (patch) | |
| tree | 186abc57e94e88f8d52b9f1410a223fcd37ce27d /src | |
| parent | Don't keep base_address in the loader, it doesn't belong there (with it, the ... (diff) | |
| download | yuzu-251f29dd7fa530997cd6d27a8db28c4a39efd127.tar.gz yuzu-251f29dd7fa530997cd6d27a8db28c4a39efd127.tar.xz yuzu-251f29dd7fa530997cd6d27a8db28c4a39efd127.zip | |
Optimize the vertex loader, nearly doubling its speed.
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/vertex_loader.cpp | 85 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.h | 1 |
2 files changed, 54 insertions, 32 deletions
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index c2630d9eb..38682d2be 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -46,13 +46,11 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | |||
| 46 | 46 | ||
| 47 | u32 attribute_index = loader_config.GetComponent(component); | 47 | u32 attribute_index = loader_config.GetComponent(component); |
| 48 | if (attribute_index < 12) { | 48 | if (attribute_index < 12) { |
| 49 | int element_size = attribute_config.GetElementSizeInBytes(attribute_index); | 49 | offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); |
| 50 | offset = Common::AlignUp(offset, element_size); | ||
| 51 | vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; | 50 | vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; |
| 52 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | 51 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); |
| 53 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | 52 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); |
| 54 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | 53 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); |
| 55 | vertex_attribute_element_size[attribute_index] = element_size; | ||
| 56 | offset += attribute_config.GetStride(attribute_index); | 54 | offset += attribute_config.GetStride(attribute_index); |
| 57 | } else if (attribute_index < 16) { | 55 | } else if (attribute_index < 16) { |
| 58 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively | 56 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively |
| @@ -68,38 +66,63 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | |||
| 68 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { | 66 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { |
| 69 | for (int i = 0; i < num_total_attributes; ++i) { | 67 | for (int i = 0; i < num_total_attributes; ++i) { |
| 70 | if (vertex_attribute_elements[i] != 0) { | 68 | if (vertex_attribute_elements[i] != 0) { |
| 71 | // Default attribute values set if array elements have < 4 components. This | ||
| 72 | // is *not* carried over from the default attribute settings even if they're | ||
| 73 | // enabled for this attribute. | ||
| 74 | static const float24 zero = float24::FromFloat32(0.0f); | ||
| 75 | static const float24 one = float24::FromFloat32(1.0f); | ||
| 76 | input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | ||
| 77 | |||
| 78 | // Load per-vertex data from the loader arrays | 69 | // Load per-vertex data from the loader arrays |
| 79 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 70 | u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; |
| 80 | u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | 71 | |
| 81 | const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | 72 | if (g_debug_context && Pica::g_debug_context->recorder) { |
| 82 | 73 | memory_accesses.AddAccess(source_addr, | |
| 83 | if (g_debug_context && Pica::g_debug_context->recorder) { | 74 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 |
| 84 | memory_accesses.AddAccess(source_addr, | 75 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); |
| 85 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | 76 | } |
| 86 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | 77 | |
| 78 | switch (vertex_attribute_formats[i]) { | ||
| 79 | case Regs::VertexAttributeFormat::BYTE: | ||
| 80 | { | ||
| 81 | const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 82 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 83 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 87 | } | 84 | } |
| 85 | break; | ||
| 86 | } | ||
| 87 | case Regs::VertexAttributeFormat::UBYTE: | ||
| 88 | { | ||
| 89 | const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 90 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 91 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 92 | } | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | case Regs::VertexAttributeFormat::SHORT: | ||
| 96 | { | ||
| 97 | const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 98 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 99 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 100 | } | ||
| 101 | break; | ||
| 102 | } | ||
| 103 | case Regs::VertexAttributeFormat::FLOAT: | ||
| 104 | { | ||
| 105 | const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 106 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 107 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 108 | } | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | } | ||
| 88 | 112 | ||
| 89 | const float srcval = | 113 | // Default attribute values set if array elements have < 4 components. This |
| 90 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) : | 114 | // is *not* carried over from the default attribute settings even if they're |
| 91 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : | 115 | // enabled for this attribute. |
| 92 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : | 116 | for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { |
| 93 | *reinterpret_cast<const float*>(srcdata); | 117 | input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); |
| 94 | |||
| 95 | input.attr[i][comp] = float24::FromFloat32(srcval); | ||
| 96 | LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", | ||
| 97 | comp, i, vertex, index, | ||
| 98 | base_address, | ||
| 99 | vertex_attribute_sources[i], | ||
| 100 | vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||
| 101 | input.attr[i][comp].ToFloat32()); | ||
| 102 | } | 118 | } |
| 119 | |||
| 120 | LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", | ||
| 121 | vertex_attribute_elements[i], i, vertex, index, | ||
| 122 | base_address, | ||
| 123 | vertex_attribute_sources[i], | ||
| 124 | vertex_attribute_strides[i] * vertex, | ||
| 125 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||
| 103 | } else if (vertex_attribute_is_default[i]) { | 126 | } else if (vertex_attribute_is_default[i]) { |
| 104 | // Load the default attribute if we're configured to do so | 127 | // Load the default attribute if we're configured to do so |
| 105 | input.attr[i] = g_state.vs.default_attributes[i]; | 128 | input.attr[i] = g_state.vs.default_attributes[i]; |
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index 40c5e83c9..7267ea9c6 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h | |||
| @@ -47,7 +47,6 @@ private: | |||
| 47 | u32 vertex_attribute_strides[16] = {}; | 47 | u32 vertex_attribute_strides[16] = {}; |
| 48 | Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; | 48 | Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; |
| 49 | u32 vertex_attribute_elements[16] = {}; | 49 | u32 vertex_attribute_elements[16] = {}; |
| 50 | u32 vertex_attribute_element_size[16] = {}; | ||
| 51 | bool vertex_attribute_is_default[16]; | 50 | bool vertex_attribute_is_default[16]; |
| 52 | int num_total_attributes; | 51 | int num_total_attributes; |
| 53 | }; | 52 | }; |