summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h137
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h9
3 files changed, 62 insertions, 88 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index c73ebb1f4..2871682f6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -202,6 +202,36 @@ private:
202 } 202 }
203 } 203 }
204 204
205 template <typename Func>
206 void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
207 const VAddr start_address = cpu_addr;
208 const VAddr end_address = start_address + size;
209 const VAddr search_base =
210 static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
211 const IntervalType search_interval{search_base, search_base + 1};
212 auto it = common_ranges.lower_bound(search_interval);
213 if (it == common_ranges.end()) {
214 it = common_ranges.begin();
215 }
216 for (; it != common_ranges.end(); it++) {
217 VAddr inter_addr_end = it->upper();
218 VAddr inter_addr = it->lower();
219 if (inter_addr >= end_address) {
220 break;
221 }
222 if (inter_addr_end <= start_address) {
223 continue;
224 }
225 if (inter_addr_end > end_address) {
226 inter_addr_end = end_address;
227 }
228 if (inter_addr < start_address) {
229 inter_addr = start_address;
230 }
231 func(inter_addr, inter_addr_end);
232 }
233 }
234
205 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 235 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
206 return (cpu_addr & ~Core::Memory::PAGE_MASK) == 236 return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
207 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); 237 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
@@ -441,12 +471,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
441 } 471 }
442 const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); 472 const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount);
443 const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); 473 const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount);
444 if (!(source_dirty || dest_dirty)) { 474 if (!source_dirty && !dest_dirty) {
445 return false; 475 return false;
446 } 476 }
447 477
448 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 478 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
449 common_ranges.subtract(subtract_interval); 479 uncommitted_ranges.subtract(subtract_interval);
480 for (auto& interval_set : committed_ranges) {
481 interval_set.subtract(subtract_interval);
482 }
450 483
451 BufferId buffer_a; 484 BufferId buffer_a;
452 BufferId buffer_b; 485 BufferId buffer_b;
@@ -457,46 +490,28 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
457 } while (has_deleted_buffers); 490 } while (has_deleted_buffers);
458 auto& src_buffer = slot_buffers[buffer_a]; 491 auto& src_buffer = slot_buffers[buffer_a];
459 auto& dest_buffer = slot_buffers[buffer_b]; 492 auto& dest_buffer = slot_buffers[buffer_b];
460 SynchronizeBuffer(src_buffer, *cpu_src_address, amount); 493 SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
461 SynchronizeBuffer(dest_buffer, *cpu_dest_address, amount); 494 SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
462 std::array copies{BufferCopy{ 495 std::array copies{BufferCopy{
463 .src_offset = src_buffer.Offset(*cpu_src_address), 496 .src_offset = src_buffer.Offset(*cpu_src_address),
464 .dst_offset = dest_buffer.Offset(*cpu_dest_address), 497 .dst_offset = dest_buffer.Offset(*cpu_dest_address),
465 .size = amount, 498 .size = amount,
466 }}; 499 }};
467 500
468 auto mirror = [&](VAddr base_address, u64 size) { 501 boost::container::small_vector<IntervalType, 4> tmp_intervals;
469 VAddr diff = base_address - *cpu_src_address; 502 auto mirror = [&](VAddr base_address, VAddr base_address_end) {
470 VAddr new_base_address = *cpu_dest_address + diff; 503 const u64 size = base_address_end - base_address;
504 const VAddr diff = base_address - *cpu_src_address;
505 const VAddr new_base_address = *cpu_dest_address + diff;
471 const IntervalType add_interval{new_base_address, new_base_address + size}; 506 const IntervalType add_interval{new_base_address, new_base_address + size};
472 common_ranges.add(add_interval); 507 uncommitted_ranges.add(add_interval);
508 tmp_intervals.push_back(add_interval);
473 }; 509 };
474 510 ForEachWrittenRange(*cpu_src_address, amount, mirror);
475 const VAddr start_address = *cpu_src_address; 511 // This subtraction in this order is important for overlapping copies.
476 const VAddr end_address = start_address + amount; 512 common_ranges.subtract(subtract_interval);
477 const IntervalType search_interval{start_address - amount, 1}; 513 for (const IntervalType add_interval : tmp_intervals) {
478 auto it = common_ranges.lower_bound(search_interval); 514 common_ranges.add(add_interval);
479 if (it == common_ranges.end()) {
480 it = common_ranges.begin();
481 }
482 while (it != common_ranges.end()) {
483 VAddr inter_addr_end = it->upper();
484 VAddr inter_addr = it->lower();
485 if (inter_addr >= end_address) {
486 break;
487 }
488 if (inter_addr_end <= start_address) {
489 it++;
490 continue;
491 }
492 if (inter_addr_end > end_address) {
493 inter_addr_end = end_address;
494 }
495 if (inter_addr < start_address) {
496 inter_addr = start_address;
497 }
498 mirror(inter_addr, inter_addr_end - inter_addr);
499 it++;
500 } 515 }
501 516
502 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 517 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
@@ -695,30 +710,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
695 710
696 const VAddr start_address = buffer_addr + range_offset; 711 const VAddr start_address = buffer_addr + range_offset;
697 const VAddr end_address = start_address + range_size; 712 const VAddr end_address = start_address + range_size;
698 const IntervalType search_interval{cpu_addr, 1}; 713 ForEachWrittenRange(start_address, range_size, add_download);
699 auto it = common_ranges.lower_bound(search_interval);
700 if (it == common_ranges.end()) {
701 it = common_ranges.begin();
702 }
703 while (it != common_ranges.end()) {
704 VAddr inter_addr_end = it->upper();
705 VAddr inter_addr = it->lower();
706 if (inter_addr >= end_address) {
707 break;
708 }
709 if (inter_addr_end <= start_address) {
710 it++;
711 continue;
712 }
713 if (inter_addr_end > end_address) {
714 inter_addr_end = end_address;
715 }
716 if (inter_addr < start_address) {
717 inter_addr = start_address;
718 }
719 add_download(inter_addr, inter_addr_end);
720 it++;
721 }
722 const IntervalType subtract_interval{start_address, end_address}; 714 const IntervalType subtract_interval{start_address, end_address};
723 common_ranges.subtract(subtract_interval); 715 common_ranges.subtract(subtract_interval);
724 }); 716 });
@@ -816,7 +808,9 @@ void BufferCache<P>::BindHostIndexBuffer() {
816 const u32 size = index_buffer.size; 808 const u32 size = index_buffer.size;
817 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 809 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
818 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 810 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
819 runtime.BindIndexBuffer(buffer, offset, size); 811 const u32 new_offset = offset + maxwell3d.regs.index_array.first *
812 maxwell3d.regs.index_array.FormatSizeInBytes();
813 runtime.BindIndexBuffer(buffer, new_offset, size);
820 } else { 814 } else {
821 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, 815 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
822 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, 816 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
@@ -1429,30 +1423,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1429 1423
1430 const VAddr start_address = buffer_addr + range_offset; 1424 const VAddr start_address = buffer_addr + range_offset;
1431 const VAddr end_address = start_address + range_size; 1425 const VAddr end_address = start_address + range_size;
1432 const IntervalType search_interval{start_address - range_size, 1}; 1426 ForEachWrittenRange(start_address, range_size, add_download);
1433 auto it = common_ranges.lower_bound(search_interval);
1434 if (it == common_ranges.end()) {
1435 it = common_ranges.begin();
1436 }
1437 while (it != common_ranges.end()) {
1438 VAddr inter_addr_end = it->upper();
1439 VAddr inter_addr = it->lower();
1440 if (inter_addr >= end_address) {
1441 break;
1442 }
1443 if (inter_addr_end <= start_address) {
1444 it++;
1445 continue;
1446 }
1447 if (inter_addr_end > end_address) {
1448 inter_addr_end = end_address;
1449 }
1450 if (inter_addr < start_address) {
1451 inter_addr = start_address;
1452 }
1453 add_download(inter_addr, inter_addr_end);
1454 it++;
1455 }
1456 const IntervalType subtract_interval{start_address, end_address}; 1427 const IntervalType subtract_interval{start_address, end_address};
1457 common_ranges.subtract(subtract_interval); 1428 common_ranges.subtract(subtract_interval);
1458 }); 1429 });
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 8d4239afc..58014c1c3 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -10,13 +10,15 @@
10#include <stop_token> 10#include <stop_token>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/fermi_2d.h" 12#include "video_core/engines/fermi_2d.h"
13#include "video_core/engines/maxwell_dma.h"
14#include "video_core/gpu.h" 13#include "video_core/gpu.h"
15#include "video_core/guest_driver.h" 14#include "video_core/guest_driver.h"
16 15
17namespace Tegra { 16namespace Tegra {
18class MemoryManager; 17class MemoryManager;
18namespace Engines {
19class AccelerateDMAInterface;
19} 20}
21} // namespace Tegra
20 22
21namespace VideoCore { 23namespace VideoCore {
22 24
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb562518d..3a78de258 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -13,6 +13,7 @@
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/engines/maxwell_dma.h"
16#include "video_core/rasterizer_accelerated.h" 17#include "video_core/rasterizer_accelerated.h"
17#include "video_core/rasterizer_interface.h" 18#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_vulkan/blit_image.h" 19#include "video_core/renderer_vulkan/blit_image.h"
@@ -51,12 +52,12 @@ class StateTracker;
51 52
52class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { 53class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
53public: 54public:
54 explicit AccelerateDMA(BufferCache& buffer_cache); 55 explicit AccelerateDMA(BufferCache& buffer_cache);
55 56
56 bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; 57 bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
57 58
58 private: 59private:
59 BufferCache& buffer_cache; 60 BufferCache& buffer_cache;
60}; 61};
61 62
62class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 63class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {