diff options
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 120 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 10 |
2 files changed, 107 insertions, 23 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e1dc32e17..f04538dca 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -336,6 +336,7 @@ private: | |||
| 336 | std::vector<BufferId> cached_write_buffer_ids; | 336 | std::vector<BufferId> cached_write_buffer_ids; |
| 337 | 337 | ||
| 338 | IntervalSet uncommitted_ranges; | 338 | IntervalSet uncommitted_ranges; |
| 339 | IntervalSet common_ranges; | ||
| 339 | std::deque<IntervalSet> committed_ranges; | 340 | std::deque<IntervalSet> committed_ranges; |
| 340 | 341 | ||
| 341 | size_t immediate_buffer_capacity = 0; | 342 | size_t immediate_buffer_capacity = 0; |
| @@ -359,6 +360,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 359 | // Ensure the first slot is used for the null buffer | 360 | // Ensure the first slot is used for the null buffer |
| 360 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 361 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 361 | deletion_iterator = slot_buffers.end(); | 362 | deletion_iterator = slot_buffers.end(); |
| 363 | common_ranges.clear(); | ||
| 362 | } | 364 | } |
| 363 | 365 | ||
| 364 | template <class P> | 366 | template <class P> |
| @@ -592,19 +594,56 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 592 | const VAddr cpu_addr_end = interval.upper(); | 594 | const VAddr cpu_addr_end = interval.upper(); |
| 593 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 595 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 594 | boost::container::small_vector<BufferCopy, 1> copies; | 596 | boost::container::small_vector<BufferCopy, 1> copies; |
| 595 | buffer.ForEachDownloadRange(cpu_addr, size, true, | 597 | buffer.ForEachDownloadRange( |
| 596 | [&](u64 range_offset, u64 range_size) { | 598 | cpu_addr, size, true, [&](u64 range_offset, u64 range_size) { |
| 597 | downloads.push_back({ | 599 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 598 | BufferCopy{ | 600 | const auto add_download = [&](VAddr start, VAddr end) { |
| 599 | .src_offset = range_offset, | 601 | const u64 new_offset = start - buffer_addr; |
| 600 | .dst_offset = total_size_bytes, | 602 | const u64 new_size = end - start; |
| 601 | .size = range_size, | 603 | downloads.push_back({ |
| 602 | }, | 604 | BufferCopy{ |
| 603 | buffer_id, | 605 | .src_offset = new_offset, |
| 604 | }); | 606 | .dst_offset = total_size_bytes, |
| 605 | total_size_bytes += range_size; | 607 | .size = new_size, |
| 606 | largest_copy = std::max(largest_copy, range_size); | 608 | }, |
| 607 | }); | 609 | buffer_id, |
| 610 | }); | ||
| 611 | // Align up to avoid cache conflicts | ||
| 612 | constexpr u64 align = 256ULL; | ||
| 613 | constexpr u64 mask = ~(align - 1ULL); | ||
| 614 | total_size_bytes += (new_size + align - 1) & mask; | ||
| 615 | largest_copy = std::max(largest_copy, new_size); | ||
| 616 | }; | ||
| 617 | |||
| 618 | const VAddr start_address = buffer_addr + range_offset; | ||
| 619 | const VAddr end_address = start_address + range_size; | ||
| 620 | const IntervalType search_interval{cpu_addr, 1}; | ||
| 621 | auto it = common_ranges.lower_bound(search_interval); | ||
| 622 | if (it == common_ranges.end()) { | ||
| 623 | it = common_ranges.begin(); | ||
| 624 | } | ||
| 625 | while (it != common_ranges.end()) { | ||
| 626 | VAddr inter_addr_end = it->upper(); | ||
| 627 | VAddr inter_addr = it->lower(); | ||
| 628 | if (inter_addr >= end_address) { | ||
| 629 | break; | ||
| 630 | } | ||
| 631 | if (inter_addr_end <= start_address) { | ||
| 632 | it++; | ||
| 633 | continue; | ||
| 634 | } | ||
| 635 | if (inter_addr_end > end_address) { | ||
| 636 | inter_addr_end = end_address; | ||
| 637 | } | ||
| 638 | if (inter_addr < start_address) { | ||
| 639 | inter_addr = start_address; | ||
| 640 | } | ||
| 641 | add_download(inter_addr, inter_addr_end); | ||
| 642 | it++; | ||
| 643 | } | ||
| 644 | const IntervalType subtract_interval{start_address, end_address}; | ||
| 645 | common_ranges.subtract(subtract_interval); | ||
| 646 | }); | ||
| 608 | }); | 647 | }); |
| 609 | } | 648 | } |
| 610 | } | 649 | } |
| @@ -1060,13 +1099,15 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||
| 1060 | Buffer& buffer = slot_buffers[buffer_id]; | 1099 | Buffer& buffer = slot_buffers[buffer_id]; |
| 1061 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | 1100 | buffer.MarkRegionAsGpuModified(cpu_addr, size); |
| 1062 | 1101 | ||
| 1102 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | ||
| 1103 | common_ranges.add(base_interval); | ||
| 1104 | |||
| 1063 | const bool is_accuracy_high = | 1105 | const bool is_accuracy_high = |
| 1064 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | 1106 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; |
| 1065 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | 1107 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
| 1066 | if (!is_async && !is_accuracy_high) { | 1108 | if (!is_async && !is_accuracy_high) { |
| 1067 | return; | 1109 | return; |
| 1068 | } | 1110 | } |
| 1069 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | ||
| 1070 | uncommitted_ranges.add(base_interval); | 1111 | uncommitted_ranges.add(base_interval); |
| 1071 | } | 1112 | } |
| 1072 | 1113 | ||
| @@ -1292,13 +1333,50 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1292 | u64 total_size_bytes = 0; | 1333 | u64 total_size_bytes = 0; |
| 1293 | u64 largest_copy = 0; | 1334 | u64 largest_copy = 0; |
| 1294 | buffer.ForEachDownloadRange(cpu_addr, size, true, [&](u64 range_offset, u64 range_size) { | 1335 | buffer.ForEachDownloadRange(cpu_addr, size, true, [&](u64 range_offset, u64 range_size) { |
| 1295 | copies.push_back(BufferCopy{ | 1336 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 1296 | .src_offset = range_offset, | 1337 | const auto add_download = [&](VAddr start, VAddr end) { |
| 1297 | .dst_offset = total_size_bytes, | 1338 | const u64 new_offset = start - buffer_addr; |
| 1298 | .size = range_size, | 1339 | const u64 new_size = end - start; |
| 1299 | }); | 1340 | copies.push_back(BufferCopy{ |
| 1300 | total_size_bytes += range_size; | 1341 | .src_offset = new_offset, |
| 1301 | largest_copy = std::max(largest_copy, range_size); | 1342 | .dst_offset = total_size_bytes, |
| 1343 | .size = new_size, | ||
| 1344 | }); | ||
| 1345 | // Align up to avoid cache conflicts | ||
| 1346 | constexpr u64 align = 256ULL; | ||
| 1347 | constexpr u64 mask = ~(align - 1ULL); | ||
| 1348 | total_size_bytes += (new_size + align - 1) & mask; | ||
| 1349 | largest_copy = std::max(largest_copy, new_size); | ||
| 1350 | }; | ||
| 1351 | |||
| 1352 | const VAddr start_address = buffer_addr + range_offset; | ||
| 1353 | const VAddr end_address = start_address + range_size; | ||
| 1354 | const IntervalType search_interval{start_address - range_size, 1}; | ||
| 1355 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1356 | if (it == common_ranges.end()) { | ||
| 1357 | it = common_ranges.begin(); | ||
| 1358 | } | ||
| 1359 | while (it != common_ranges.end()) { | ||
| 1360 | VAddr inter_addr_end = it->upper(); | ||
| 1361 | VAddr inter_addr = it->lower(); | ||
| 1362 | if (inter_addr >= end_address) { | ||
| 1363 | break; | ||
| 1364 | } | ||
| 1365 | if (inter_addr_end <= start_address) { | ||
| 1366 | it++; | ||
| 1367 | continue; | ||
| 1368 | } | ||
| 1369 | if (inter_addr_end > end_address) { | ||
| 1370 | inter_addr_end = end_address; | ||
| 1371 | } | ||
| 1372 | if (inter_addr < start_address) { | ||
| 1373 | inter_addr = start_address; | ||
| 1374 | } | ||
| 1375 | add_download(inter_addr, inter_addr_end); | ||
| 1376 | it++; | ||
| 1377 | } | ||
| 1378 | const IntervalType subtract_interval{start_address, end_address}; | ||
| 1379 | common_ranges.subtract(subtract_interval); | ||
| 1302 | }); | 1380 | }); |
| 1303 | if (total_size_bytes == 0) { | 1381 | if (total_size_bytes == 0) { |
| 1304 | return; | 1382 | return; |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 8b33c04ab..8d28bd884 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/cityhash.h" | 5 | #include "common/cityhash.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "common/settings.h" | ||
| 7 | #include "core/core.h" | 8 | #include "core/core.h" |
| 8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| 9 | #include "video_core/dma_pusher.h" | 10 | #include "video_core/dma_pusher.h" |
| @@ -76,8 +77,13 @@ bool DmaPusher::Step() { | |||
| 76 | 77 | ||
| 77 | // Push buffer non-empty, read a word | 78 | // Push buffer non-empty, read a word |
| 78 | command_headers.resize(command_list_header.size); | 79 | command_headers.resize(command_list_header.size); |
| 79 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | 80 | if (Settings::IsGPULevelHigh()) { |
| 80 | command_list_header.size * sizeof(u32)); | 81 | gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), |
| 82 | command_list_header.size * sizeof(u32)); | ||
| 83 | } else { | ||
| 84 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | ||
| 85 | command_list_header.size * sizeof(u32)); | ||
| 86 | } | ||
| 81 | } | 87 | } |
| 82 | for (std::size_t index = 0; index < command_headers.size();) { | 88 | for (std::size_t index = 0; index < command_headers.size();) { |
| 83 | const CommandHeader& command_header = command_headers[index]; | 89 | const CommandHeader& command_header = command_headers[index]; |