From 47ff00881703eeab03d32e60289ac34b7f4a7994 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 19:01:47 +0200 Subject: Refactor: Extract VertexLoader from command_processor.cpp. Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached. --- src/video_core/vertex_loader.cpp | 119 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/video_core/vertex_loader.cpp (limited to 'src/video_core/vertex_loader.cpp') diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp new file mode 100644 index 000000000..258002b07 --- /dev/null +++ b/src/video_core/vertex_loader.cpp @@ -0,0 +1,119 @@ +#include +#include + +#include "boost/range/algorithm/fill.hpp" + +#include "common/assert.h" +#include "common/alignment.h" +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/logging/log.h" + +#include "core/memory.h" + +#include "debug_utils/debug_utils.h" + +#include "pica.h" +#include "pica_state.h" +#include "pica_types.h" +#include "vertex_loader.h" + +namespace Pica { + +void VertexLoader::Setup(const Pica::Regs ®s) { + const auto& attribute_config = regs.vertex_attributes; + base_address = attribute_config.GetPhysicalBaseAddress(); + num_total_attributes = attribute_config.GetNumTotalAttributes(); + + boost::fill(vertex_attribute_sources, 0xdeadbeef); + + for (int i = 0; i < 16; i++) { + vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i); + } + + // Setup attribute data from loaders + for (int loader = 0; loader < 12; ++loader) { + const auto& loader_config = attribute_config.attribute_loaders[loader]; + + u32 offset = 0; + + // TODO: What happens if a loader overwrites a previous one's data? + for (unsigned component = 0; component < loader_config.component_count; ++component) { + if (component >= 12) { + LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); + continue; + } + + u32 attribute_index = loader_config.GetComponent(component); + if (attribute_index < 12) { + int element_size = attribute_config.GetElementSizeInBytes(attribute_index); + offset = Common::AlignUp(offset, element_size); + vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; + vertex_attribute_strides[attribute_index] = static_cast(loader_config.byte_count); + vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); + vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); + vertex_attribute_element_size[attribute_index] = element_size; + offset += attribute_config.GetStride(attribute_index); + } else if (attribute_index < 16) { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } else { + UNREACHABLE(); // This is truly unreachable due to the number of bits for each component + } + } + } +} + +void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex &input, MemoryAccesses &memory_accesses) { + for (int i = 0; i < num_total_attributes; ++i) { + if (vertex_attribute_elements[i] != 0) { + // Default attribute values set if array elements have < 4 components. This + // is *not* carried over from the default attribute settings even if they're + // enabled for this attribute. + static const float24 zero = float24::FromFloat32(0.0f); + static const float24 one = float24::FromFloat32(1.0f); + input.attr[i] = Math::Vec4(zero, zero, zero, one); + + // Load per-vertex data from the loader arrays + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; + const u8* srcdata = Memory::GetPhysicalPointer(source_addr); + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + } + + const float srcval = + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast(srcdata) : + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast(srcdata) : + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast(srcdata) : + *reinterpret_cast(srcdata); + + input.attr[i][comp] = float24::FromFloat32(srcval); + LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", + comp, i, vertex, index, + base_address, + vertex_attribute_sources[i] - base_address, + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], + input.attr[i][comp].ToFloat32()); + } + } else if (vertex_attribute_is_default[i]) { + // Load the default attribute if we're configured to do so + input.attr[i] = g_state.vs.default_attributes[i]; + LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", + i, vertex, index, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), + input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); + } else { + // TODO(yuriks): In this case, no data gets loaded and the vertex + // remains with the last value it had. This isn't currently maintained + // as global state, however, and so won't work in Citra yet. + } + } +} + +} // namespace Pica \ No newline at end of file -- cgit v1.2.3 From d00e2340c63c61ccb987c4ab0d76957f6cc84144 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 19:40:11 +0200 Subject: Move "&" to their proper place, add missing includes and make some properly relative. --- src/video_core/vertex_loader.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/vertex_loader.cpp') diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 258002b07..958201e5e 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -14,14 +14,14 @@ #include "debug_utils/debug_utils.h" -#include "pica.h" -#include "pica_state.h" -#include "pica_types.h" -#include "vertex_loader.h" +#include "video_core/pica.h" +#include "video_core/pica_state.h" +#include "video_core/pica_types.h" +#include "video_core/vertex_loader.h" namespace Pica { -void VertexLoader::Setup(const Pica::Regs ®s) { +void VertexLoader::Setup(const Pica::Regs& regs) { const auto& attribute_config = regs.vertex_attributes; base_address = attribute_config.GetPhysicalBaseAddress(); num_total_attributes = attribute_config.GetNumTotalAttributes(); @@ -66,7 +66,7 @@ void VertexLoader::Setup(const Pica::Regs ®s) { } } -void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex &input, MemoryAccesses &memory_accesses) { +void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { // Default attribute values set if array elements have < 4 components. This -- cgit v1.2.3 From 2403e86cbb5c49afeceb98f7c6e843da78dff415 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 20:17:35 +0200 Subject: Don't keep base_address in the loader, it doesn't belong there (with it, the loader can't be cached). --- src/video_core/vertex_loader.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/video_core/vertex_loader.cpp') diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 958201e5e..c2630d9eb 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -23,7 +23,6 @@ namespace Pica { void VertexLoader::Setup(const Pica::Regs& regs) { const auto& attribute_config = regs.vertex_attributes; - base_address = attribute_config.GetPhysicalBaseAddress(); num_total_attributes = attribute_config.GetNumTotalAttributes(); boost::fill(vertex_attribute_sources, 0xdeadbeef); @@ -49,7 +48,7 @@ void VertexLoader::Setup(const Pica::Regs& regs) { if (attribute_index < 12) { int element_size = attribute_config.GetElementSizeInBytes(attribute_index); offset = Common::AlignUp(offset, element_size); - vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; + vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; vertex_attribute_strides[attribute_index] = static_cast(loader_config.byte_count); vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); @@ -66,7 +65,7 @@ void VertexLoader::Setup(const Pica::Regs& regs) { } } -void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { +void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { // Default attribute values set if array elements have < 4 components. This @@ -78,7 +77,7 @@ void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex& input, // Load per-vertex data from the loader arrays for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; + u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; const u8* srcdata = Memory::GetPhysicalPointer(source_addr); if (g_debug_context && Pica::g_debug_context->recorder) { @@ -97,7 +96,7 @@ void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex& input, LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", comp, i, vertex, index, base_address, - vertex_attribute_sources[i] - base_address, + vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], input.attr[i][comp].ToFloat32()); } -- cgit v1.2.3 From 251f29dd7fa530997cd6d27a8db28c4a39efd127 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 21:43:49 +0200 Subject: Optimize the vertex loader, nearly doubling its speed. --- src/video_core/vertex_loader.cpp | 85 +++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 31 deletions(-) (limited to 'src/video_core/vertex_loader.cpp') diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index c2630d9eb..38682d2be 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -46,13 +46,11 @@ void VertexLoader::Setup(const Pica::Regs& regs) { u32 attribute_index = loader_config.GetComponent(component); if (attribute_index < 12) { - int element_size = attribute_config.GetElementSizeInBytes(attribute_index); - offset = Common::AlignUp(offset, element_size); + offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; vertex_attribute_strides[attribute_index] = static_cast(loader_config.byte_count); vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); - vertex_attribute_element_size[attribute_index] = element_size; offset += attribute_config.GetStride(attribute_index); } else if (attribute_index < 16) { // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively @@ -68,38 +66,63 @@ void VertexLoader::Setup(const Pica::Regs& regs) { void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { - // Default attribute values set if array elements have < 4 components. This - // is *not* carried over from the default attribute settings even if they're - // enabled for this attribute. - static const float24 zero = float24::FromFloat32(0.0f); - static const float24 one = float24::FromFloat32(1.0f); - input.attr[i] = Math::Vec4(zero, zero, zero, one); - // Load per-vertex data from the loader arrays - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; - const u8* srcdata = Memory::GetPhysicalPointer(source_addr); - - if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess(source_addr, - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 - : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + } + + switch (vertex_attribute_formats[i]) { + case Regs::VertexAttributeFormat::BYTE: + { + const s8* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); } + break; + } + case Regs::VertexAttributeFormat::UBYTE: + { + const u8* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::SHORT: + { + const s16* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::FLOAT: + { + const float* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + } - const float srcval = - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast(srcdata) : - *reinterpret_cast(srcdata); - - input.attr[i][comp] = float24::FromFloat32(srcval); - LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", - comp, i, vertex, index, - base_address, - vertex_attribute_sources[i], - vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], - input.attr[i][comp].ToFloat32()); + // Default attribute values set if array elements have < 4 components. This + // is *not* carried over from the default attribute settings even if they're + // enabled for this attribute. + for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { + input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); } + + LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", + vertex_attribute_elements[i], i, vertex, index, + base_address, + vertex_attribute_sources[i], + vertex_attribute_strides[i] * vertex, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); } else if (vertex_attribute_is_default[i]) { // Load the default attribute if we're configured to do so input.attr[i] = g_state.vs.default_attributes[i]; -- cgit v1.2.3 From a442ee07f4e3b0da974ac9f0899a14e85ddaec92 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 22:30:01 +0200 Subject: Debugger fix --- src/video_core/vertex_loader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/vertex_loader.cpp') diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 38682d2be..4784817f4 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -70,9 +70,9 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess(source_addr, + memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 - : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); } switch (vertex_attribute_formats[i]) { -- cgit v1.2.3 From a86d7cacc1f56c6e8ff5f046ba7e2477e92d873f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 29 Apr 2016 08:50:21 +0200 Subject: Move and rename the MemoryAccesses class to MemoryAccessTracker. --- src/video_core/vertex_loader.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/vertex_loader.cpp') diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 4784817f4..8a3d91896 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -12,8 +12,7 @@ #include "core/memory.h" -#include "debug_utils/debug_utils.h" - +#include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" @@ -63,7 +62,7 @@ void VertexLoader::Setup(const Pica::Regs& regs) { } } -void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { +void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { // Load per-vertex data from the loader arrays -- cgit v1.2.3