diff options
| author | 2016-04-29 09:42:47 -0400 | |
|---|---|---|
| committer | 2016-04-29 09:42:47 -0400 | |
| commit | 90243c56fb90d7d74cbef40da3eec97d967c10a2 (patch) | |
| tree | 94d223001196ca9b774a8d018535ba2be8de1b01 /src | |
| parent | Common: Remove section measurement from profiler (#1731) (diff) | |
| parent | Move and rename the MemoryAccesses class to MemoryAccessTracker. (diff) | |
| download | yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.gz yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.xz yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.zip | |
Merge pull request #1730 from hrydgard/vertex-loader
* Remove late accesses to attribute_config
* Refactor: Extract VertexLoader from command_processor.cpp.
Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached.
* Move "&" to their proper place, add missing includes and make some properly relative.
* Don't keep base_address in the loader, it doesn't belong there (with it, the loader can't be cached).
* Optimize the vertex loader, nearly doubling its speed.
* Debugger fix
* Move and rename the MemoryAccesses class to MemoryAccessTracker.
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 129 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 30 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 2 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.cpp | 140 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.h | 28 |
6 files changed, 210 insertions, 121 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 76cfd4f7d..de4082b1f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -16,6 +16,7 @@ set(SRCS | |||
| 16 | shader/shader_interpreter.cpp | 16 | shader/shader_interpreter.cpp |
| 17 | swrasterizer.cpp | 17 | swrasterizer.cpp |
| 18 | utils.cpp | 18 | utils.cpp |
| 19 | vertex_loader.cpp | ||
| 19 | video_core.cpp | 20 | video_core.cpp |
| 20 | ) | 21 | ) |
| 21 | 22 | ||
| @@ -43,6 +44,7 @@ set(HEADERS | |||
| 43 | shader/shader_interpreter.h | 44 | shader/shader_interpreter.h |
| 44 | swrasterizer.h | 45 | swrasterizer.h |
| 45 | utils.h | 46 | utils.h |
| 47 | vertex_loader.h | ||
| 46 | video_core.h | 48 | video_core.h |
| 47 | ) | 49 | ) |
| 48 | 50 | ||
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 97ba8214e..58883e374 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/video_core.h" | 21 | #include "video_core/video_core.h" |
| 22 | #include "video_core/debug_utils/debug_utils.h" | 22 | #include "video_core/debug_utils/debug_utils.h" |
| 23 | #include "video_core/shader/shader_interpreter.h" | 23 | #include "video_core/shader/shader_interpreter.h" |
| 24 | #include "video_core/vertex_loader.h" | ||
| 24 | 25 | ||
| 25 | namespace Pica { | 26 | namespace Pica { |
| 26 | 27 | ||
| @@ -188,54 +189,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 188 | #if PICA_LOG_TEV | 189 | #if PICA_LOG_TEV |
| 189 | DebugUtils::DumpTevStageConfig(regs.GetTevStages()); | 190 | DebugUtils::DumpTevStageConfig(regs.GetTevStages()); |
| 190 | #endif | 191 | #endif |
| 191 | |||
| 192 | if (g_debug_context) | 192 | if (g_debug_context) |
| 193 | g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | 193 | g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); |
| 194 | 194 | ||
| 195 | const auto& attribute_config = regs.vertex_attributes; | 195 | // Processes information about internal vertex attributes to figure out how a vertex is loaded. |
| 196 | const u32 base_address = attribute_config.GetPhysicalBaseAddress(); | 196 | // Later, these can be compiled and cached. |
| 197 | 197 | VertexLoader loader; | |
| 198 | // Information about internal vertex attributes | 198 | const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); |
| 199 | u32 vertex_attribute_sources[16]; | 199 | loader.Setup(regs); |
| 200 | boost::fill(vertex_attribute_sources, 0xdeadbeef); | ||
| 201 | u32 vertex_attribute_strides[16] = {}; | ||
| 202 | Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; | ||
| 203 | |||
| 204 | u32 vertex_attribute_elements[16] = {}; | ||
| 205 | u32 vertex_attribute_element_size[16] = {}; | ||
| 206 | |||
| 207 | // Setup attribute data from loaders | ||
| 208 | for (int loader = 0; loader < 12; ++loader) { | ||
| 209 | const auto& loader_config = attribute_config.attribute_loaders[loader]; | ||
| 210 | |||
| 211 | u32 offset = 0; | ||
| 212 | |||
| 213 | // TODO: What happens if a loader overwrites a previous one's data? | ||
| 214 | for (unsigned component = 0; component < loader_config.component_count; ++component) { | ||
| 215 | if (component >= 12) { | ||
| 216 | LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); | ||
| 217 | continue; | ||
| 218 | } | ||
| 219 | |||
| 220 | u32 attribute_index = loader_config.GetComponent(component); | ||
| 221 | if (attribute_index < 12) { | ||
| 222 | int element_size = attribute_config.GetElementSizeInBytes(attribute_index); | ||
| 223 | offset = Common::AlignUp(offset, element_size); | ||
| 224 | vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; | ||
| 225 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | ||
| 226 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | ||
| 227 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||
| 228 | vertex_attribute_element_size[attribute_index] = element_size; | ||
| 229 | offset += attribute_config.GetStride(attribute_index); | ||
| 230 | } else if (attribute_index < 16) { | ||
| 231 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively | ||
| 232 | offset = Common::AlignUp(offset, 4); | ||
| 233 | offset += (attribute_index - 11) * 4; | ||
| 234 | } else { | ||
| 235 | UNREACHABLE(); // This is truly unreachable due to the number of bits for each component | ||
| 236 | } | ||
| 237 | } | ||
| 238 | } | ||
| 239 | 200 | ||
| 240 | // Load vertices | 201 | // Load vertices |
| 241 | bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | 202 | bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); |
| @@ -259,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 259 | } | 220 | } |
| 260 | } | 221 | } |
| 261 | 222 | ||
| 262 | class { | 223 | DebugUtils::MemoryAccessTracker memory_accesses; |
| 263 | /// Combine overlapping and close ranges | ||
| 264 | void SimplifyRanges() { | ||
| 265 | for (auto it = ranges.begin(); it != ranges.end(); ++it) { | ||
| 266 | // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too | ||
| 267 | auto it2 = std::next(it); | ||
| 268 | while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { | ||
| 269 | it->second = std::max(it->second, it2->first + it2->second - it->first); | ||
| 270 | it2 = ranges.erase(it2); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | public: | ||
| 276 | /// Record a particular memory access in the list | ||
| 277 | void AddAccess(u32 paddr, u32 size) { | ||
| 278 | // Create new range or extend existing one | ||
| 279 | ranges[paddr] = std::max(ranges[paddr], size); | ||
| 280 | |||
| 281 | // Simplify ranges... | ||
| 282 | SimplifyRanges(); | ||
| 283 | } | ||
| 284 | |||
| 285 | /// Map of accessed ranges (mapping start address to range size) | ||
| 286 | std::map<u32, u32> ranges; | ||
| 287 | } memory_accesses; | ||
| 288 | 224 | ||
| 289 | // Simple circular-replacement vertex cache | 225 | // Simple circular-replacement vertex cache |
| 290 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup | 226 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup |
| @@ -328,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 328 | if (!vertex_cache_hit) { | 264 | if (!vertex_cache_hit) { |
| 329 | // Initialize data for the current vertex | 265 | // Initialize data for the current vertex |
| 330 | Shader::InputVertex input; | 266 | Shader::InputVertex input; |
| 331 | 267 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | |
| 332 | for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||
| 333 | if (vertex_attribute_elements[i] != 0) { | ||
| 334 | // Default attribute values set if array elements have < 4 components. This | ||
| 335 | // is *not* carried over from the default attribute settings even if they're | ||
| 336 | // enabled for this attribute. | ||
| 337 | static const float24 zero = float24::FromFloat32(0.0f); | ||
| 338 | static const float24 one = float24::FromFloat32(1.0f); | ||
| 339 | input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | ||
| 340 | |||
| 341 | // Load per-vertex data from the loader arrays | ||
| 342 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 343 | u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | ||
| 344 | const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | ||
| 345 | |||
| 346 | if (g_debug_context && Pica::g_debug_context->recorder) { | ||
| 347 | memory_accesses.AddAccess(source_addr, | ||
| 348 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | ||
| 349 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | ||
| 350 | } | ||
| 351 | |||
| 352 | const float srcval = | ||
| 353 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) : | ||
| 354 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : | ||
| 355 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : | ||
| 356 | *reinterpret_cast<const float*>(srcdata); | ||
| 357 | |||
| 358 | input.attr[i][comp] = float24::FromFloat32(srcval); | ||
| 359 | LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", | ||
| 360 | comp, i, vertex, index, | ||
| 361 | attribute_config.GetPhysicalBaseAddress(), | ||
| 362 | vertex_attribute_sources[i] - base_address, | ||
| 363 | vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||
| 364 | input.attr[i][comp].ToFloat32()); | ||
| 365 | } | ||
| 366 | } else if (attribute_config.IsDefaultAttribute(i)) { | ||
| 367 | // Load the default attribute if we're configured to do so | ||
| 368 | input.attr[i] = g_state.vs.default_attributes[i]; | ||
| 369 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||
| 370 | i, vertex, index, | ||
| 371 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||
| 372 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||
| 373 | } else { | ||
| 374 | // TODO(yuriks): In this case, no data gets loaded and the vertex | ||
| 375 | // remains with the last value it had. This isn't currently maintained | ||
| 376 | // as global state, however, and so won't work in Citra yet. | ||
| 377 | } | ||
| 378 | } | ||
| 379 | 268 | ||
| 380 | if (g_debug_context) | 269 | if (g_debug_context) |
| 381 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); | 270 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); |
| 382 | 271 | ||
| 383 | // Send to vertex shader | 272 | // Send to vertex shader |
| 384 | output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); | 273 | output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); |
| 385 | 274 | ||
| 386 | if (is_indexed) { | 275 | if (is_indexed) { |
| 387 | vertex_cache[vertex_cache_pos] = output; | 276 | vertex_cache[vertex_cache_pos] = output; |
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 56f9bd958..dd0828cee 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h | |||
| @@ -216,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); | |||
| 216 | 216 | ||
| 217 | void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); | 217 | void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); |
| 218 | 218 | ||
| 219 | /** | ||
| 220 | * Used in the vertex loader to merge access records. TODO: Investigate if actually useful. | ||
| 221 | */ | ||
| 222 | class MemoryAccessTracker { | ||
| 223 | /// Combine overlapping and close ranges | ||
| 224 | void SimplifyRanges() { | ||
| 225 | for (auto it = ranges.begin(); it != ranges.end(); ++it) { | ||
| 226 | // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too | ||
| 227 | auto it2 = std::next(it); | ||
| 228 | while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { | ||
| 229 | it->second = std::max(it->second, it2->first + it2->second - it->first); | ||
| 230 | it2 = ranges.erase(it2); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | public: | ||
| 236 | /// Record a particular memory access in the list | ||
| 237 | void AddAccess(u32 paddr, u32 size) { | ||
| 238 | // Create new range or extend existing one | ||
| 239 | ranges[paddr] = std::max(ranges[paddr], size); | ||
| 240 | |||
| 241 | // Simplify ranges... | ||
| 242 | SimplifyRanges(); | ||
| 243 | } | ||
| 244 | |||
| 245 | /// Map of accessed ranges (mapping start address to range size) | ||
| 246 | std::map<u32, u32> ranges; | ||
| 247 | }; | ||
| 248 | |||
| 219 | } // namespace | 249 | } // namespace |
| 220 | 250 | ||
| 221 | } // namespace | 251 | } // namespace |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 9c5bd97bd..9ce9344d2 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -25,7 +25,7 @@ namespace Pica { | |||
| 25 | namespace Shader { | 25 | namespace Shader { |
| 26 | 26 | ||
| 27 | struct InputVertex { | 27 | struct InputVertex { |
| 28 | Math::Vec4<float24> attr[16]; | 28 | alignas(16) Math::Vec4<float24> attr[16]; |
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | struct OutputVertex { | 31 | struct OutputVertex { |
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp new file mode 100644 index 000000000..8a3d91896 --- /dev/null +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | #include <cmath> | ||
| 2 | #include <string> | ||
| 3 | |||
| 4 | #include "boost/range/algorithm/fill.hpp" | ||
| 5 | |||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/alignment.h" | ||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_funcs.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | |||
| 13 | #include "core/memory.h" | ||
| 14 | |||
| 15 | #include "video_core/debug_utils/debug_utils.h" | ||
| 16 | #include "video_core/pica.h" | ||
| 17 | #include "video_core/pica_state.h" | ||
| 18 | #include "video_core/pica_types.h" | ||
| 19 | #include "video_core/vertex_loader.h" | ||
| 20 | |||
| 21 | namespace Pica { | ||
| 22 | |||
| 23 | void VertexLoader::Setup(const Pica::Regs& regs) { | ||
| 24 | const auto& attribute_config = regs.vertex_attributes; | ||
| 25 | num_total_attributes = attribute_config.GetNumTotalAttributes(); | ||
| 26 | |||
| 27 | boost::fill(vertex_attribute_sources, 0xdeadbeef); | ||
| 28 | |||
| 29 | for (int i = 0; i < 16; i++) { | ||
| 30 | vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i); | ||
| 31 | } | ||
| 32 | |||
| 33 | // Setup attribute data from loaders | ||
| 34 | for (int loader = 0; loader < 12; ++loader) { | ||
| 35 | const auto& loader_config = attribute_config.attribute_loaders[loader]; | ||
| 36 | |||
| 37 | u32 offset = 0; | ||
| 38 | |||
| 39 | // TODO: What happens if a loader overwrites a previous one's data? | ||
| 40 | for (unsigned component = 0; component < loader_config.component_count; ++component) { | ||
| 41 | if (component >= 12) { | ||
| 42 | LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | |||
| 46 | u32 attribute_index = loader_config.GetComponent(component); | ||
| 47 | if (attribute_index < 12) { | ||
| 48 | offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); | ||
| 49 | vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; | ||
| 50 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | ||
| 51 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | ||
| 52 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||
| 53 | offset += attribute_config.GetStride(attribute_index); | ||
| 54 | } else if (attribute_index < 16) { | ||
| 55 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively | ||
| 56 | offset = Common::AlignUp(offset, 4); | ||
| 57 | offset += (attribute_index - 11) * 4; | ||
| 58 | } else { | ||
| 59 | UNREACHABLE(); // This is truly unreachable due to the number of bits for each component | ||
| 60 | } | ||
| 61 | } | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { | ||
| 66 | for (int i = 0; i < num_total_attributes; ++i) { | ||
| 67 | if (vertex_attribute_elements[i] != 0) { | ||
| 68 | // Load per-vertex data from the loader arrays | ||
| 69 | u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; | ||
| 70 | |||
| 71 | if (g_debug_context && Pica::g_debug_context->recorder) { | ||
| 72 | memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( | ||
| 73 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | ||
| 74 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); | ||
| 75 | } | ||
| 76 | |||
| 77 | switch (vertex_attribute_formats[i]) { | ||
| 78 | case Regs::VertexAttributeFormat::BYTE: | ||
| 79 | { | ||
| 80 | const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 81 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 82 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 83 | } | ||
| 84 | break; | ||
| 85 | } | ||
| 86 | case Regs::VertexAttributeFormat::UBYTE: | ||
| 87 | { | ||
| 88 | const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 89 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 90 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 91 | } | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | case Regs::VertexAttributeFormat::SHORT: | ||
| 95 | { | ||
| 96 | const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 97 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 98 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | case Regs::VertexAttributeFormat::FLOAT: | ||
| 103 | { | ||
| 104 | const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); | ||
| 105 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 106 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||
| 107 | } | ||
| 108 | break; | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | // Default attribute values set if array elements have < 4 components. This | ||
| 113 | // is *not* carried over from the default attribute settings even if they're | ||
| 114 | // enabled for this attribute. | ||
| 115 | for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { | ||
| 116 | input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||
| 117 | } | ||
| 118 | |||
| 119 | LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", | ||
| 120 | vertex_attribute_elements[i], i, vertex, index, | ||
| 121 | base_address, | ||
| 122 | vertex_attribute_sources[i], | ||
| 123 | vertex_attribute_strides[i] * vertex, | ||
| 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||
| 125 | } else if (vertex_attribute_is_default[i]) { | ||
| 126 | // Load the default attribute if we're configured to do so | ||
| 127 | input.attr[i] = g_state.vs.default_attributes[i]; | ||
| 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||
| 129 | i, vertex, index, | ||
| 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||
| 131 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||
| 132 | } else { | ||
| 133 | // TODO(yuriks): In this case, no data gets loaded and the vertex | ||
| 134 | // remains with the last value it had. This isn't currently maintained | ||
| 135 | // as global state, however, and so won't work in Citra yet. | ||
| 136 | } | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | } // namespace Pica \ No newline at end of file | ||
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h new file mode 100644 index 000000000..ff42d1596 --- /dev/null +++ b/src/video_core/vertex_loader.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <iterator> | ||
| 4 | #include <algorithm> | ||
| 5 | |||
| 6 | #include "video_core/pica.h" | ||
| 7 | #include "video_core/shader/shader.h" | ||
| 8 | #include "video_core/debug_utils/debug_utils.h" | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | |||
| 12 | class VertexLoader { | ||
| 13 | public: | ||
| 14 | void Setup(const Pica::Regs& regs); | ||
| 15 | void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); | ||
| 16 | |||
| 17 | int GetNumTotalAttributes() const { return num_total_attributes; } | ||
| 18 | |||
| 19 | private: | ||
| 20 | u32 vertex_attribute_sources[16]; | ||
| 21 | u32 vertex_attribute_strides[16] = {}; | ||
| 22 | Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; | ||
| 23 | u32 vertex_attribute_elements[16] = {}; | ||
| 24 | bool vertex_attribute_is_default[16]; | ||
| 25 | int num_total_attributes; | ||
| 26 | }; | ||
| 27 | |||
| 28 | } // namespace Pica | ||