diff options
| author | 2015-07-25 03:19:17 -0300 | |
|---|---|---|
| committer | 2015-08-04 23:41:47 -0300 | |
| commit | a96502edd394e5010334811e31ab9febc65ef0c9 (patch) | |
| tree | 11ec1efad7b48037427978de312472dde0f94f09 /src | |
| parent | Merge pull request #1019 from yuriks/msvc2015-workaround (diff) | |
| download | yuzu-a96502edd394e5010334811e31ab9febc65ef0c9.tar.gz yuzu-a96502edd394e5010334811e31ab9febc65ef0c9.tar.xz yuzu-a96502edd394e5010334811e31ab9febc65ef0c9.zip | |
Videocore: Implement simple vertex caching
This gives a ~2/3 reduction in the amount of vertices that need to be
processed through the vertex loaders and the vertex shader, yielding a
good speedup.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/command_processor.cpp | 151 |
1 files changed, 89 insertions, 62 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 36c3b9947..243abe842 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -206,88 +206,115 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 206 | std::map<u32, u32> ranges; | 206 | std::map<u32, u32> ranges; |
| 207 | } memory_accesses; | 207 | } memory_accesses; |
| 208 | 208 | ||
| 209 | // Simple circular-replacement vertex cache | ||
| 210 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup | ||
| 211 | const size_t VERTEX_CACHE_SIZE = 32; | ||
| 212 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||
| 213 | std::array<VertexShader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; | ||
| 214 | |||
| 215 | unsigned int vertex_cache_pos = 0; | ||
| 216 | vertex_cache_ids.fill(-1); | ||
| 217 | |||
| 209 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 218 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
| 210 | { | 219 | { |
| 211 | unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; | 220 | unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; |
| 212 | 221 | ||
| 222 | // -1 is a common special value used for primitive restart. Since it's unknown if | ||
| 223 | // the PICA supports it, and it would mess up the caching, guard against it here. | ||
| 224 | ASSERT(vertex != -1); | ||
| 225 | |||
| 226 | bool vertex_cache_hit = false; | ||
| 227 | VertexShader::OutputVertex output; | ||
| 228 | |||
| 213 | if (is_indexed) { | 229 | if (is_indexed) { |
| 214 | // TODO: Implement some sort of vertex cache! | ||
| 215 | if (g_debug_context && Pica::g_debug_context->recorder) { | 230 | if (g_debug_context && Pica::g_debug_context->recorder) { |
| 216 | int size = index_u16 ? 2 : 1; | 231 | int size = index_u16 ? 2 : 1; |
| 217 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); | 232 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); |
| 218 | } | 233 | } |
| 234 | |||
| 235 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||
| 236 | if (vertex == vertex_cache_ids[i]) { | ||
| 237 | output = vertex_cache[i]; | ||
| 238 | vertex_cache_hit = true; | ||
| 239 | break; | ||
| 240 | } | ||
| 241 | } | ||
| 219 | } | 242 | } |
| 220 | 243 | ||
| 221 | // Initialize data for the current vertex | 244 | if (!vertex_cache_hit) { |
| 222 | VertexShader::InputVertex input; | 245 | // Initialize data for the current vertex |
| 223 | 246 | VertexShader::InputVertex input; | |
| 224 | for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | 247 | |
| 225 | if (vertex_attribute_elements[i] != 0) { | 248 | for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { |
| 226 | // Default attribute values set if array elements have < 4 components. This | 249 | if (vertex_attribute_elements[i] != 0) { |
| 227 | // is *not* carried over from the default attribute settings even if they're | 250 | // Default attribute values set if array elements have < 4 components. This |
| 228 | // enabled for this attribute. | 251 | // is *not* carried over from the default attribute settings even if they're |
| 229 | static const float24 zero = float24::FromFloat32(0.0f); | 252 | // enabled for this attribute. |
| 230 | static const float24 one = float24::FromFloat32(1.0f); | 253 | static const float24 zero = float24::FromFloat32(0.0f); |
| 231 | input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | 254 | static const float24 one = float24::FromFloat32(1.0f); |
| 232 | 255 | input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | |
| 233 | // Load per-vertex data from the loader arrays | 256 | |
| 234 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 257 | // Load per-vertex data from the loader arrays |
| 235 | u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | 258 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
| 236 | const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | 259 | u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; |
| 237 | 260 | const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | |
| 238 | if (g_debug_context && Pica::g_debug_context->recorder) { | 261 | |
| 239 | memory_accesses.AddAccess(source_addr, | 262 | if (g_debug_context && Pica::g_debug_context->recorder) { |
| 240 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | 263 | memory_accesses.AddAccess(source_addr, |
| 241 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | 264 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 |
| 265 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | ||
| 266 | } | ||
| 267 | |||
| 268 | const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : | ||
| 269 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : | ||
| 270 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata : | ||
| 271 | *(float*)srcdata; | ||
| 272 | |||
| 273 | input.attr[i][comp] = float24::FromFloat32(srcval); | ||
| 274 | LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", | ||
| 275 | comp, i, vertex, index, | ||
| 276 | attribute_config.GetPhysicalBaseAddress(), | ||
| 277 | vertex_attribute_sources[i] - base_address, | ||
| 278 | vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||
| 279 | input.attr[i][comp].ToFloat32()); | ||
| 242 | } | 280 | } |
| 243 | 281 | } else if (attribute_config.IsDefaultAttribute(i)) { | |
| 244 | const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : | 282 | // Load the default attribute if we're configured to do so |
| 245 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : | 283 | input.attr[i] = g_state.vs.default_attributes[i]; |
| 246 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata : | 284 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", |
| 247 | *(float*)srcdata; | 285 | i, vertex, index, |
| 248 | 286 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | |
| 249 | input.attr[i][comp] = float24::FromFloat32(srcval); | 287 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 250 | LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", | 288 | } else { |
| 251 | comp, i, vertex, index, | 289 | // TODO(yuriks): In this case, no data gets loaded and the vertex |
| 252 | attribute_config.GetPhysicalBaseAddress(), | 290 | // remains with the last value it had. This isn't currently maintained |
| 253 | vertex_attribute_sources[i] - base_address, | 291 | // as global state, however, and so won't work in Citra yet. |
| 254 | vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||
| 255 | input.attr[i][comp].ToFloat32()); | ||
| 256 | } | 292 | } |
| 257 | } else if (attribute_config.IsDefaultAttribute(i)) { | ||
| 258 | // Load the default attribute if we're configured to do so | ||
| 259 | input.attr[i] = g_state.vs.default_attributes[i]; | ||
| 260 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||
| 261 | i, vertex, index, | ||
| 262 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||
| 263 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||
| 264 | } else { | ||
| 265 | // TODO(yuriks): In this case, no data gets loaded and the vertex remains | ||
| 266 | // with the last value it had. This isn't currently maintained | ||
| 267 | // as global state, however, and so won't work in Cita yet. | ||
| 268 | } | 293 | } |
| 269 | } | ||
| 270 | 294 | ||
| 271 | if (g_debug_context) | 295 | if (g_debug_context) |
| 272 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); | 296 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); |
| 273 | 297 | ||
| 274 | #if PICA_DUMP_GEOMETRY | 298 | #if PICA_DUMP_GEOMETRY |
| 275 | // NOTE: When dumping geometry, we simply assume that the first input attribute | 299 | // NOTE: When dumping geometry, we simply assume that the first input attribute |
| 276 | // corresponds to the position for now. | 300 | // corresponds to the position for now. |
| 277 | DebugUtils::GeometryDumper::Vertex dumped_vertex = { | 301 | DebugUtils::GeometryDumper::Vertex dumped_vertex = { |
| 278 | input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() | 302 | input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() |
| 279 | }; | 303 | }; |
| 280 | using namespace std::placeholders; | 304 | using namespace std::placeholders; |
| 281 | dumping_primitive_assembler.SubmitVertex(dumped_vertex, | 305 | dumping_primitive_assembler.SubmitVertex(dumped_vertex, |
| 282 | std::bind(&DebugUtils::GeometryDumper::AddTriangle, | 306 | std::bind(&DebugUtils::GeometryDumper::AddTriangle, |
| 283 | &geometry_dumper, _1, _2, _3)); | 307 | &geometry_dumper, _1, _2, _3)); |
| 284 | #endif | 308 | #endif |
| 285 | 309 | ||
| 286 | // Send to vertex shader | 310 | // Send to vertex shader |
| 287 | VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); | 311 | output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); |
| 288 | 312 | ||
| 289 | if (is_indexed) { | 313 | if (is_indexed) { |
| 290 | // TODO: Add processed vertex to vertex cache! | 314 | vertex_cache[vertex_cache_pos] = output; |
| 315 | vertex_cache_ids[vertex_cache_pos] = vertex; | ||
| 316 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||
| 317 | } | ||
| 291 | } | 318 | } |
| 292 | 319 | ||
| 293 | if (Settings::values.use_hw_renderer) { | 320 | if (Settings::values.use_hw_renderer) { |