summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache.h299
-rw-r--r--src/video_core/buffer_cache/buffer_block.h77
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h449
-rw-r--r--src/video_core/buffer_cache/map_interval.h89
-rw-r--r--src/video_core/gpu.h4
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp51
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h39
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
9 files changed, 684 insertions, 330 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f315e021d..e2f85c5f1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,5 +1,7 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache.h 2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.h
3 dma_pusher.cpp 5 dma_pusher.cpp
4 dma_pusher.h 6 dma_pusher.h
5 debug_utils/debug_utils.cpp 7 debug_utils/debug_utils.cpp
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h
deleted file mode 100644
index 6f868b8b4..000000000
--- a/src/video_core/buffer_cache.h
+++ /dev/null
@@ -1,299 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_cache.h"
20
21namespace VideoCore {
22class RasterizerInterface;
23}
24
25namespace VideoCommon {
26
27template <typename BufferStorageType>
28class CachedBuffer final : public RasterizerCacheObject {
29public:
30 explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
31 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
32 ~CachedBuffer() override = default;
33
34 VAddr GetCpuAddr() const override {
35 return cpu_addr;
36 }
37
38 std::size_t GetSizeInBytes() const override {
39 return size;
40 }
41
42 u8* GetWritableHostPtr() const {
43 return host_ptr;
44 }
45
46 std::size_t GetSize() const {
47 return size;
48 }
49
50 std::size_t GetCapacity() const {
51 return capacity;
52 }
53
54 bool IsInternalized() const {
55 return is_internal;
56 }
57
58 const BufferStorageType& GetBuffer() const {
59 return buffer;
60 }
61
62 void SetSize(std::size_t new_size) {
63 size = new_size;
64 }
65
66 void SetInternalState(bool is_internal_) {
67 is_internal = is_internal_;
68 }
69
70 BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
71 capacity = new_capacity;
72 std::swap(buffer, buffer_);
73 return buffer_;
74 }
75
76private:
77 u8* host_ptr{};
78 VAddr cpu_addr{};
79 std::size_t size{};
80 std::size_t capacity{};
81 bool is_internal{};
82 BufferStorageType buffer;
83};
84
85template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
86class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
87public:
88 using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
89 using BufferInfo = std::pair<const BufferType*, u64>;
90
91 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
92 std::unique_ptr<StreamBuffer> stream_buffer)
93 : RasterizerCache<Buffer>{rasterizer}, system{system},
94 stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
95 this->stream_buffer->GetHandle()} {}
96 ~BufferCache() = default;
97
98 void Unregister(const Buffer& entry) override {
99 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
100 if (entry->IsInternalized()) {
101 internalized_entries.erase(entry->GetCacheAddr());
102 }
103 ReserveBuffer(entry);
104 RasterizerCache<Buffer>::Unregister(entry);
105 }
106
107 void TickFrame() {
108 marked_for_destruction_index =
109 (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
110 MarkedForDestruction().clear();
111 }
112
113 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
114 bool internalize = false, bool is_written = false) {
115 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
116
117 auto& memory_manager = system.GPU().MemoryManager();
118 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
119 if (!host_ptr) {
120 return {GetEmptyBuffer(size), 0};
121 }
122 const auto cache_addr = ToCacheAddr(host_ptr);
123
124 // Cache management is a big overhead, so only cache entries with a given size.
125 // TODO: Figure out which size is the best for given games.
126 constexpr std::size_t max_stream_size = 0x800;
127 if (!internalize && size < max_stream_size &&
128 internalized_entries.find(cache_addr) == internalized_entries.end()) {
129 return StreamBufferUpload(host_ptr, size, alignment);
130 }
131
132 auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
133 if (!entry) {
134 return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
135 }
136
137 if (entry->GetSize() < size) {
138 IncreaseBufferSize(entry, size);
139 }
140 if (is_written) {
141 entry->MarkAsModified(true, *this);
142 }
143 return {ToHandle(entry->GetBuffer()), 0};
144 }
145
146 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
147 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
148 std::size_t alignment = 4) {
149 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
150 return StreamBufferUpload(raw_pointer, size, alignment);
151 }
152
153 void Map(std::size_t max_size) {
154 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
155 buffer_offset = buffer_offset_base;
156 }
157
158 /// Finishes the upload stream, returns true on bindings invalidation.
159 bool Unmap() {
160 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
161 return std::exchange(invalidated, false);
162 }
163
164 virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
165
166protected:
167 void FlushObjectInner(const Buffer& entry) override {
168 DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
169 }
170
171 virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
172
173 virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
174
175 virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
176 std::size_t size, const u8* data) = 0;
177
178 virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
179 std::size_t size, u8* data) = 0;
180
181 virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
182 std::size_t src_offset, std::size_t dst_offset,
183 std::size_t size) = 0;
184
185private:
186 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
187 std::size_t alignment) {
188 AlignBuffer(alignment);
189 const std::size_t uploaded_offset = buffer_offset;
190 std::memcpy(buffer_ptr, raw_pointer, size);
191
192 buffer_ptr += size;
193 buffer_offset += size;
194 return {&stream_buffer_handle, uploaded_offset};
195 }
196
197 BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
198 bool internalize, bool is_written) {
199 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
200 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
201 ASSERT(cpu_addr);
202
203 auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
204 entry->SetSize(size);
205 entry->SetInternalState(internalize);
206 RasterizerCache<Buffer>::Register(entry);
207
208 if (internalize) {
209 internalized_entries.emplace(ToCacheAddr(host_ptr));
210 }
211 if (is_written) {
212 entry->MarkAsModified(true, *this);
213 }
214
215 if (entry->GetCapacity() < size) {
216 MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
217 }
218
219 UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
220 return {ToHandle(entry->GetBuffer()), 0};
221 }
222
223 void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
224 const std::size_t old_size = entry->GetSize();
225 if (entry->GetCapacity() < new_size) {
226 const auto& old_buffer = entry->GetBuffer();
227 auto new_buffer = CreateBuffer(new_size);
228
229 // Copy bits from the old buffer to the new buffer.
230 CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
231 MarkedForDestruction().push_back(
232 entry->ExchangeBuffer(std::move(new_buffer), new_size));
233
234 // This buffer could have been used
235 invalidated = true;
236 }
237 // Upload the new bits.
238 const std::size_t size_diff = new_size - old_size;
239 UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
240
241 // Update entry's size in the object and in the cache.
242 Unregister(entry);
243
244 entry->SetSize(new_size);
245 RasterizerCache<Buffer>::Register(entry);
246 }
247
248 Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
249 if (auto entry = TryGetReservedBuffer(host_ptr)) {
250 return entry;
251 }
252 return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
253 }
254
255 Buffer TryGetReservedBuffer(u8* host_ptr) {
256 const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
257 if (it == buffer_reserve.end()) {
258 return {};
259 }
260 auto& reserve = it->second;
261 auto entry = reserve.back();
262 reserve.pop_back();
263 return entry;
264 }
265
266 void ReserveBuffer(Buffer entry) {
267 buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
268 }
269
270 void AlignBuffer(std::size_t alignment) {
271 // Align the offset, not the mapped pointer
272 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
273 buffer_ptr += offset_aligned - buffer_offset;
274 buffer_offset = offset_aligned;
275 }
276
277 std::vector<BufferStorageType>& MarkedForDestruction() {
278 return marked_for_destruction_ring_buffer[marked_for_destruction_index];
279 }
280
281 Core::System& system;
282
283 std::unique_ptr<StreamBuffer> stream_buffer;
284 BufferType stream_buffer_handle{};
285
286 bool invalidated = false;
287
288 u8* buffer_ptr = nullptr;
289 u64 buffer_offset = 0;
290 u64 buffer_offset_base = 0;
291
292 std::size_t marked_for_destruction_index = 0;
293 std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
294
295 std::unordered_set<CacheAddr> internalized_entries;
296 std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
297};
298
299} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
new file mode 100644
index 000000000..d2124443f
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -0,0 +1,77 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_set>
8#include <utility>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13
14namespace VideoCommon {
15
16class BufferBlock {
17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start);
20 }
21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end;
24 }
25
26 u8* GetWritableHostPtr() const {
27 return FromCacheAddr(cache_addr);
28 }
29
30 u8* GetWritableHostPtr(std::size_t offset) const {
31 return FromCacheAddr(cache_addr + offset);
32 }
33
34 std::size_t GetOffset(const CacheAddr in_addr) {
35 return static_cast<std::size_t>(in_addr - cache_addr);
36 }
37
38 CacheAddr GetCacheAddr() const {
39 return cache_addr;
40 }
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 }
50
51 std::size_t GetSize() const {
52 return size;
53 }
54
55 void SetEpoch(u64 new_epoch) {
56 epoch = new_epoch;
57 }
58
59 u64 GetEpoch() {
60 return epoch;
61 }
62
63protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr);
66 }
67 ~BufferBlock() = default;
68
69private:
70 CacheAddr cache_addr{};
71 CacheAddr cache_addr_end{};
72 u64 pages{};
73 std::size_t size{};
74 u64 epoch{};
75};
76
77} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
new file mode 100644
index 000000000..38ce16ed5
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -0,0 +1,449 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/buffer_cache/buffer_block.h"
19#include "video_core/buffer_cache/map_interval.h"
20#include "video_core/memory_manager.h"
21
22namespace VideoCore {
23class RasterizerInterface;
24}
25
26namespace VideoCommon {
27
28using MapInterval = std::shared_ptr<MapIntervalBase>;
29
30template <typename TBuffer, typename TBufferType, typename StreamBuffer>
31class BufferCache {
32public:
33 using BufferInfo = std::pair<const TBufferType*, u64>;
34
35 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
36 bool is_written = false) {
37 std::lock_guard lock{mutex};
38
39 auto& memory_manager = system.GPU().MemoryManager();
40 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
41 if (!host_ptr) {
42 return {GetEmptyBuffer(size), 0};
43 }
44 const auto cache_addr = ToCacheAddr(host_ptr);
45
46 // Cache management is a big overhead, so only cache entries with a given size.
47 // TODO: Figure out which size is the best for given games.
48 constexpr std::size_t max_stream_size = 0x800;
49 if (size < max_stream_size) {
50 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
51 return StreamBufferUpload(host_ptr, size, alignment);
52 }
53 }
54
55 auto block = GetBlock(cache_addr, size);
56 auto map = MapAddress(block, gpu_addr, cache_addr, size);
57 if (is_written) {
58 map->MarkAsModified(true, GetModifiedTicks());
59 if (!map->IsWritten()) {
60 map->MarkAsWritten(true);
61 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
62 }
63 } else {
64 if (map->IsWritten()) {
65 WriteBarrier();
66 }
67 }
68
69 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
70
71 return {ToHandle(block), offset};
72 }
73
74 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
75 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
76 std::size_t alignment = 4) {
77 std::lock_guard lock{mutex};
78 return StreamBufferUpload(raw_pointer, size, alignment);
79 }
80
81 void Map(std::size_t max_size) {
82 std::lock_guard lock{mutex};
83
84 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
85 buffer_offset = buffer_offset_base;
86 }
87
88 /// Finishes the upload stream, returns true on bindings invalidation.
89 bool Unmap() {
90 std::lock_guard lock{mutex};
91
92 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
93 return std::exchange(invalidated, false);
94 }
95
96 void TickFrame() {
97 ++epoch;
98 while (!pending_destruction.empty()) {
99 if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
100 break;
101 }
102 pending_destruction.pop_front();
103 }
104 }
105
106 /// Write any cached resources overlapping the specified region back to memory
107 void FlushRegion(CacheAddr addr, std::size_t size) {
108 std::lock_guard lock{mutex};
109
110 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
111 std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
112 return a->GetModificationTick() < b->GetModificationTick();
113 });
114 for (auto& object : objects) {
115 if (object->IsModified() && object->IsRegistered()) {
116 FlushMap(object);
117 }
118 }
119 }
120
121 /// Mark the specified region as being invalidated
122 void InvalidateRegion(CacheAddr addr, u64 size) {
123 std::lock_guard lock{mutex};
124
125 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
126 for (auto& object : objects) {
127 if (object->IsRegistered()) {
128 Unregister(object);
129 }
130 }
131 }
132
133 virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
134
135protected:
136 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
137 std::unique_ptr<StreamBuffer> stream_buffer)
138 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
139 stream_buffer_handle{this->stream_buffer->GetHandle()} {}
140
141 ~BufferCache() = default;
142
143 virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
144
145 virtual void WriteBarrier() = 0;
146
147 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
148
149 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
150 const u8* data) = 0;
151
152 virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
153 u8* data) = 0;
154
155 virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
156 std::size_t dst_offset, std::size_t size) = 0;
157
158 /// Register an object into the cache
159 void Register(const MapInterval& new_map, bool inherit_written = false) {
160 const CacheAddr cache_ptr = new_map->GetStart();
161 const std::optional<VAddr> cpu_addr =
162 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
163 if (!cache_ptr || !cpu_addr) {
164 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
165 new_map->GetGpuAddress());
166 return;
167 }
168 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
169 new_map->SetCpuAddress(*cpu_addr);
170 new_map->MarkAsRegistered(true);
171 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
172 mapped_addresses.insert({interval, new_map});
173 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
174 if (inherit_written) {
175 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
176 new_map->MarkAsWritten(true);
177 }
178 }
179
180 /// Unregisters an object from the cache
181 void Unregister(MapInterval& map) {
182 const std::size_t size = map->GetEnd() - map->GetStart();
183 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
184 map->MarkAsRegistered(false);
185 if (map->IsWritten()) {
186 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
187 }
188 const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
189 mapped_addresses.erase(delete_interval);
190 }
191
192private:
193 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
194 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
195 }
196
197 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
198 const CacheAddr cache_addr, const std::size_t size) {
199
200 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
201 if (overlaps.empty()) {
202 const CacheAddr cache_addr_end = cache_addr + size;
203 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
204 u8* host_ptr = FromCacheAddr(cache_addr);
205 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
206 Register(new_map);
207 return new_map;
208 }
209
210 const CacheAddr cache_addr_end = cache_addr + size;
211 if (overlaps.size() == 1) {
212 MapInterval& current_map = overlaps[0];
213 if (current_map->IsInside(cache_addr, cache_addr_end)) {
214 return current_map;
215 }
216 }
217 CacheAddr new_start = cache_addr;
218 CacheAddr new_end = cache_addr_end;
219 bool write_inheritance = false;
220 bool modified_inheritance = false;
221 // Calculate new buffer parameters
222 for (auto& overlap : overlaps) {
223 new_start = std::min(overlap->GetStart(), new_start);
224 new_end = std::max(overlap->GetEnd(), new_end);
225 write_inheritance |= overlap->IsWritten();
226 modified_inheritance |= overlap->IsModified();
227 }
228 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
229 for (auto& overlap : overlaps) {
230 Unregister(overlap);
231 }
232 UpdateBlock(block, new_start, new_end, overlaps);
233 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
234 if (modified_inheritance) {
235 new_map->MarkAsModified(true, GetModifiedTicks());
236 }
237 Register(new_map, write_inheritance);
238 return new_map;
239 }
240
241 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
242 std::vector<MapInterval>& overlaps) {
243 const IntervalType base_interval{start, end};
244 IntervalSet interval_set{};
245 interval_set.add(base_interval);
246 for (auto& overlap : overlaps) {
247 const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
248 interval_set.subtract(subtract);
249 }
250 for (auto& interval : interval_set) {
251 std::size_t size = interval.upper() - interval.lower();
252 if (size > 0) {
253 u8* host_ptr = FromCacheAddr(interval.lower());
254 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
255 }
256 }
257 }
258
259 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
260 if (size == 0) {
261 return {};
262 }
263
264 std::vector<MapInterval> objects{};
265 const IntervalType interval{addr, addr + size};
266 for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
267 objects.push_back(pair.second);
268 }
269
270 return objects;
271 }
272
273 /// Returns a ticks counter used for tracking when cached objects were last modified
274 u64 GetModifiedTicks() {
275 return ++modified_ticks;
276 }
277
278 void FlushMap(MapInterval map) {
279 std::size_t size = map->GetEnd() - map->GetStart();
280 TBuffer block = blocks[map->GetStart() >> block_page_bits];
281 u8* host_ptr = FromCacheAddr(map->GetStart());
282 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
283 map->MarkAsModified(false, 0);
284 }
285
286 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
287 std::size_t alignment) {
288 AlignBuffer(alignment);
289 const std::size_t uploaded_offset = buffer_offset;
290 std::memcpy(buffer_ptr, raw_pointer, size);
291
292 buffer_ptr += size;
293 buffer_offset += size;
294 return {&stream_buffer_handle, uploaded_offset};
295 }
296
297 void AlignBuffer(std::size_t alignment) {
298 // Align the offset, not the mapped pointer
299 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
300 buffer_ptr += offset_aligned - buffer_offset;
301 buffer_offset = offset_aligned;
302 }
303
304 TBuffer EnlargeBlock(TBuffer buffer) {
305 const std::size_t old_size = buffer->GetSize();
306 const std::size_t new_size = old_size + block_page_size;
307 const CacheAddr cache_addr = buffer->GetCacheAddr();
308 TBuffer new_buffer = CreateBlock(cache_addr, new_size);
309 CopyBlock(buffer, new_buffer, 0, 0, old_size);
310 buffer->SetEpoch(epoch);
311 pending_destruction.push_back(buffer);
312 const CacheAddr cache_addr_end = cache_addr + new_size - 1;
313 u64 page_start = cache_addr >> block_page_bits;
314 const u64 page_end = cache_addr_end >> block_page_bits;
315 while (page_start <= page_end) {
316 blocks[page_start] = new_buffer;
317 ++page_start;
318 }
319 return new_buffer;
320 }
321
322 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
323 const std::size_t size_1 = first->GetSize();
324 const std::size_t size_2 = second->GetSize();
325 const CacheAddr first_addr = first->GetCacheAddr();
326 const CacheAddr second_addr = second->GetCacheAddr();
327 const CacheAddr new_addr = std::min(first_addr, second_addr);
328 const std::size_t new_size = size_1 + size_2;
329 TBuffer new_buffer = CreateBlock(new_addr, new_size);
330 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
331 CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
332 first->SetEpoch(epoch);
333 second->SetEpoch(epoch);
334 pending_destruction.push_back(first);
335 pending_destruction.push_back(second);
336 const CacheAddr cache_addr_end = new_addr + new_size - 1;
337 u64 page_start = new_addr >> block_page_bits;
338 const u64 page_end = cache_addr_end >> block_page_bits;
339 while (page_start <= page_end) {
340 blocks[page_start] = new_buffer;
341 ++page_start;
342 }
343 return new_buffer;
344 }
345
346 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
347 TBuffer found{};
348 const CacheAddr cache_addr_end = cache_addr + size - 1;
349 u64 page_start = cache_addr >> block_page_bits;
350 const u64 page_end = cache_addr_end >> block_page_bits;
351 const u64 num_pages = page_end - page_start + 1;
352 while (page_start <= page_end) {
353 auto it = blocks.find(page_start);
354 if (it == blocks.end()) {
355 if (found) {
356 found = EnlargeBlock(found);
357 } else {
358 const CacheAddr start_addr = (page_start << block_page_bits);
359 found = CreateBlock(start_addr, block_page_size);
360 blocks[page_start] = found;
361 }
362 } else {
363 if (found) {
364 if (found == it->second) {
365 ++page_start;
366 continue;
367 }
368 found = MergeBlocks(found, it->second);
369 } else {
370 found = it->second;
371 }
372 }
373 ++page_start;
374 }
375 return found;
376 }
377
378 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
379 u64 page_start = start >> write_page_bit;
380 const u64 page_end = end >> write_page_bit;
381 while (page_start <= page_end) {
382 auto it = written_pages.find(page_start);
383 if (it != written_pages.end()) {
384 it->second = it->second + 1;
385 } else {
386 written_pages[page_start] = 1;
387 }
388 page_start++;
389 }
390 }
391
392 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
393 u64 page_start = start >> write_page_bit;
394 const u64 page_end = end >> write_page_bit;
395 while (page_start <= page_end) {
396 auto it = written_pages.find(page_start);
397 if (it != written_pages.end()) {
398 if (it->second > 1) {
399 it->second = it->second - 1;
400 } else {
401 written_pages.erase(it);
402 }
403 }
404 page_start++;
405 }
406 }
407
408 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
409 u64 page_start = start >> write_page_bit;
410 const u64 page_end = end >> write_page_bit;
411 while (page_start <= page_end) {
412 if (written_pages.count(page_start) > 0) {
413 return true;
414 }
415 page_start++;
416 }
417 return false;
418 }
419
420 std::unique_ptr<StreamBuffer> stream_buffer;
421 TBufferType stream_buffer_handle{};
422
423 bool invalidated = false;
424
425 u8* buffer_ptr = nullptr;
426 u64 buffer_offset = 0;
427 u64 buffer_offset_base = 0;
428
429 using IntervalSet = boost::icl::interval_set<CacheAddr>;
430 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
431 using IntervalType = typename IntervalCache::interval_type;
432 IntervalCache mapped_addresses{};
433
434 static constexpr u64 write_page_bit{11};
435 std::unordered_map<u64, u32> written_pages{};
436
437 static constexpr u64 block_page_bits{21};
438 static constexpr u64 block_page_size{1 << block_page_bits};
439 std::unordered_map<u64, TBuffer> blocks{};
440
441 std::list<TBuffer> pending_destruction{};
442 u64 epoch{};
443 u64 modified_ticks{};
444 VideoCore::RasterizerInterface& rasterizer;
445 Core::System& system;
446 std::recursive_mutex mutex;
447};
448
449} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
new file mode 100644
index 000000000..3a104d5cd
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/gpu.h"
9
10namespace VideoCommon {
11
12class MapIntervalBase {
13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16
17 void SetCpuAddress(VAddr new_cpu_addr) {
18 cpu_addr = new_cpu_addr;
19 }
20
21 VAddr GetCpuAddress() const {
22 return cpu_addr;
23 }
24
25 GPUVAddr GetGpuAddress() const {
26 return gpu_addr;
27 }
28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
30 return (start <= other_start && other_end <= end);
31 }
32
33 bool operator==(const MapIntervalBase& rhs) const {
34 return std::tie(start, end) == std::tie(rhs.start, rhs.end);
35 }
36
37 bool operator!=(const MapIntervalBase& rhs) const {
38 return !operator==(rhs);
39 }
40
41 void MarkAsRegistered(const bool registered) {
42 is_registered = registered;
43 }
44
45 bool IsRegistered() const {
46 return is_registered;
47 }
48
49 CacheAddr GetStart() const {
50 return start;
51 }
52
53 CacheAddr GetEnd() const {
54 return end;
55 }
56
57 void MarkAsModified(const bool is_modified_, const u64 tick) {
58 is_modified = is_modified_;
59 ticks = tick;
60 }
61
62 bool IsModified() const {
63 return is_modified;
64 }
65
66 u64 GetModificationTick() const {
67 return ticks;
68 }
69
70 void MarkAsWritten(const bool is_written_) {
71 is_written = is_written_;
72 }
73
74 bool IsWritten() const {
75 return is_written;
76 }
77
78private:
79 CacheAddr start;
80 CacheAddr end;
81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{};
83 bool is_written{};
84 bool is_modified{};
85 bool is_registered{};
86 u64 ticks{};
87};
88
89} // namespace VideoCommon
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 11857ff99..0baf2177c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -19,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) {
19 return reinterpret_cast<CacheAddr>(host_ptr); 19 return reinterpret_cast<CacheAddr>(host_ptr);
20} 20}
21 21
22inline u8* FromCacheAddr(CacheAddr cache_addr) {
23 return reinterpret_cast<u8*>(cache_addr);
24}
25
22namespace Core { 26namespace Core {
23class System; 27class System;
24} 28}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2a9b523f5..0781e6595 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,28 +7,40 @@
7#include <glad/glad.h> 7#include <glad/glad.h>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h"
10#include "video_core/renderer_opengl/gl_buffer_cache.h" 11#include "video_core/renderer_opengl/gl_buffer_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 12#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
13 14
14namespace OpenGL { 15namespace OpenGL {
15 16
17MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
18
19CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
20 : VideoCommon::BufferBlock{cache_addr, size} {
21 gl_buffer.Create();
22 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
23}
24
25CachedBufferBlock::~CachedBufferBlock() = default;
26
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 27OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
17 std::size_t stream_size) 28 std::size_t stream_size)
18 : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ 29 : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
19 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} 30 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
20 31
21OGLBufferCache::~OGLBufferCache() = default; 32OGLBufferCache::~OGLBufferCache() = default;
22 33
23OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { 34Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
24 OGLBuffer buffer; 35 return std::make_shared<CachedBufferBlock>(cache_addr, size);
25 buffer.Create(); 36}
26 glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 37
27 return buffer; 38void OGLBufferCache::WriteBarrier() {
39 glMemoryBarrier(GL_ALL_BARRIER_BITS);
28} 40}
29 41
30const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { 42const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
31 return &buffer.handle; 43 return buffer->GetHandle();
32} 44}
33 45
34const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { 46const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
@@ -36,23 +48,24 @@ const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
36 return &null_buffer; 48 return &null_buffer;
37} 49}
38 50
39void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, 51void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
40 const u8* data) { 52 const u8* data) {
41 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), 53 glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
42 static_cast<GLsizeiptr>(size), data); 54 static_cast<GLsizeiptr>(size), data);
43} 55}
44 56
45void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, 57void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
46 std::size_t size, u8* data) { 58 u8* data) {
47 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), 59 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
60 glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
48 static_cast<GLsizeiptr>(size), data); 61 static_cast<GLsizeiptr>(size), data);
49} 62}
50 63
51void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, 64void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
52 std::size_t src_offset, std::size_t dst_offset, 65 std::size_t dst_offset, std::size_t size) {
53 std::size_t size) { 66 glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
54 glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), 67 static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
55 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); 68 static_cast<GLsizeiptr>(size));
56} 69}
57 70
58} // namespace OpenGL 71} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c8ac4038..022e7bfa9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -7,7 +7,7 @@
7#include <memory> 7#include <memory>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -21,7 +21,24 @@ namespace OpenGL {
21class OGLStreamBuffer; 21class OGLStreamBuffer;
22class RasterizerOpenGL; 22class RasterizerOpenGL;
23 23
24class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { 24class CachedBufferBlock;
25
26using Buffer = std::shared_ptr<CachedBufferBlock>;
27
28class CachedBufferBlock : public VideoCommon::BufferBlock {
29public:
30 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
31 ~CachedBufferBlock();
32
33 const GLuint* GetHandle() const {
34 return &gl_buffer.handle;
35 }
36
37private:
38 OGLBuffer gl_buffer{};
39};
40
41class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
25public: 42public:
26 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 43 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
27 std::size_t stream_size); 44 std::size_t stream_size);
@@ -30,18 +47,20 @@ public:
30 const GLuint* GetEmptyBuffer(std::size_t) override; 47 const GLuint* GetEmptyBuffer(std::size_t) override;
31 48
32protected: 49protected:
33 OGLBuffer CreateBuffer(std::size_t size) override; 50 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
51
52 void WriteBarrier() override;
34 53
35 const GLuint* ToHandle(const OGLBuffer& buffer) override; 54 const GLuint* ToHandle(const Buffer& buffer) override;
36 55
37 void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, 56 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
38 const u8* data) override; 57 const u8* data) override;
39 58
40 void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, 59 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
41 u8* data) override; 60 u8* data) override;
42 61
43 void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, 62 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
44 std::size_t dst_offset, std::size_t size) override; 63 std::size_t dst_offset, std::size_t size) override;
45}; 64};
46 65
47} // namespace OpenGL 66} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 80cfda7e4..019583718 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -980,7 +980,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr
980 GPUVAddr gpu_addr, std::size_t size) { 980 GPUVAddr gpu_addr, std::size_t size) {
981 const auto alignment{device.GetShaderStorageBufferAlignment()}; 981 const auto alignment{device.GetShaderStorageBufferAlignment()};
982 const auto [ssbo, buffer_offset] = 982 const auto [ssbo, buffer_offset] =
983 buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); 983 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
984 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); 984 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
985} 985}
986 986