summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar liamwhite2023-06-07 14:03:57 -0400
committerGravatar GitHub2023-06-07 14:03:57 -0400
commitcfb76d8f3ed8862bc341afeaf6d25a401e2976cf (patch)
tree30098242f24010db0da3cd6152a4a716d739b20d /src
parentMerge pull request #10583 from ameerj/ill-logic (diff)
parentgl_staging_buffers: Optimization to reduce fence waiting (diff)
downloadyuzu-cfb76d8f3ed8862bc341afeaf6d25a401e2976cf.tar.gz
yuzu-cfb76d8f3ed8862bc341afeaf6d25a401e2976cf.tar.xz
yuzu-cfb76d8f3ed8862bc341afeaf6d25a401e2976cf.zip
Merge pull request #10476 from ameerj/gl-memory-maps
OpenGL: Make use of persistent buffer maps in buffer cache
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h10
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp58
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp150
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.h (renamed from src/video_core/renderer_opengl/gl_stream_buffer.h)44
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h47
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h1
15 files changed, 316 insertions, 204 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 94e3000ba..bf6439530 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -133,8 +133,8 @@ add_library(video_core STATIC
133 renderer_opengl/gl_shader_util.h 133 renderer_opengl/gl_shader_util.h
134 renderer_opengl/gl_state_tracker.cpp 134 renderer_opengl/gl_state_tracker.cpp
135 renderer_opengl/gl_state_tracker.h 135 renderer_opengl/gl_state_tracker.h
136 renderer_opengl/gl_stream_buffer.cpp 136 renderer_opengl/gl_staging_buffer_pool.cpp
137 renderer_opengl/gl_stream_buffer.h 137 renderer_opengl/gl_staging_buffer_pool.h
138 renderer_opengl/gl_texture_cache.cpp 138 renderer_opengl/gl_texture_cache.cpp
139 renderer_opengl/gl_texture_cache.h 139 renderer_opengl/gl_texture_cache.h
140 renderer_opengl/gl_texture_cache_base.cpp 140 renderer_opengl/gl_texture_cache_base.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f1ad5f7cb..2f281b370 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -478,7 +478,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
478 478
479 if (committed_ranges.empty()) { 479 if (committed_ranges.empty()) {
480 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 480 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
481
482 async_buffers.emplace_back(std::optional<Async_Buffer>{}); 481 async_buffers.emplace_back(std::optional<Async_Buffer>{});
483 } 482 }
484 return; 483 return;
@@ -539,7 +538,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
539 committed_ranges.clear(); 538 committed_ranges.clear();
540 if (downloads.empty()) { 539 if (downloads.empty()) {
541 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 540 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
542
543 async_buffers.emplace_back(std::optional<Async_Buffer>{}); 541 async_buffers.emplace_back(std::optional<Async_Buffer>{});
544 } 542 }
545 return; 543 return;
@@ -691,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
691 const u32 size = channel_state->index_buffer.size; 689 const u32 size = channel_state->index_buffer.size;
692 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 690 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
693 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { 691 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
694 if constexpr (USE_MEMORY_MAPS) { 692 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
695 auto upload_staging = runtime.UploadStagingBuffer(size); 693 auto upload_staging = runtime.UploadStagingBuffer(size);
696 std::array<BufferCopy, 1> copies{ 694 std::array<BufferCopy, 1> copies{
697 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; 695 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
@@ -1462,7 +1460,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
1462template <class P> 1460template <class P>
1463void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1461void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1464 std::span<BufferCopy> copies) { 1462 std::span<BufferCopy> copies) {
1465 if constexpr (USE_MEMORY_MAPS) { 1463 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
1466 MappedUploadMemory(buffer, total_size_bytes, copies); 1464 MappedUploadMemory(buffer, total_size_bytes, copies);
1467 } else { 1465 } else {
1468 ImmediateUploadMemory(buffer, largest_copy, copies); 1466 ImmediateUploadMemory(buffer, largest_copy, copies);
@@ -1473,7 +1471,7 @@ template <class P>
1473void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, 1471void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1474 [[maybe_unused]] u64 largest_copy, 1472 [[maybe_unused]] u64 largest_copy,
1475 [[maybe_unused]] std::span<const BufferCopy> copies) { 1473 [[maybe_unused]] std::span<const BufferCopy> copies) {
1476 if constexpr (!USE_MEMORY_MAPS) { 1474 if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
1477 std::span<u8> immediate_buffer; 1475 std::span<u8> immediate_buffer;
1478 for (const BufferCopy& copy : copies) { 1476 for (const BufferCopy& copy : copies) {
1479 std::span<const u8> upload_span; 1477 std::span<const u8> upload_span;
@@ -1532,7 +1530,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1532 auto& buffer = slot_buffers[buffer_id]; 1530 auto& buffer = slot_buffers[buffer_id];
1533 SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); 1531 SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
1534 1532
1535 if constexpr (USE_MEMORY_MAPS) { 1533 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
1536 auto upload_staging = runtime.UploadStagingBuffer(copy_size); 1534 auto upload_staging = runtime.UploadStagingBuffer(copy_size);
1537 std::array copies{BufferCopy{ 1535 std::array copies{BufferCopy{
1538 .src_offset = upload_staging.offset, 1536 .src_offset = upload_staging.offset,
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index c689fe06b..60a1f285e 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -173,6 +173,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
173 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 173 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
174 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; 174 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
175 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; 175 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
176 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
176 177
177 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; 178 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
178 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; 179 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6d3bda192..c419714d4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
106 return views.back().texture.handle; 106 return views.back().texture.handle;
107} 107}
108 108
109BufferCacheRuntime::BufferCacheRuntime(const Device& device_) 109BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
110 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, 110 StagingBufferPool& staging_buffer_pool_)
111 : device{device_}, staging_buffer_pool{staging_buffer_pool_},
112 has_fast_buffer_sub_data{device.HasFastBufferSubData()},
111 use_assembly_shaders{device.UseAssemblyShaders()}, 113 use_assembly_shaders{device.UseAssemblyShaders()},
112 has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, 114 has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
113 stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { 115 stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
140 }(); 142 }();
141} 143}
142 144
145StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
146 return staging_buffer_pool.RequestUploadBuffer(size);
147}
148
149StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
150 return staging_buffer_pool.RequestDownloadBuffer(size);
151}
152
143u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { 153u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
144 if (device.CanReportMemoryUsage()) { 154 if (device.CanReportMemoryUsage()) {
145 return device_access_memory - device.GetCurrentDedicatedVideoMemory(); 155 return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@@ -147,15 +157,49 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
147 return 2_GiB; 157 return 2_GiB;
148} 158}
149 159
150void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 160void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
151 std::span<const VideoCommon::BufferCopy> copies) { 161 std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
162 if (barrier) {
163 PreCopyBarrier();
164 }
152 for (const VideoCommon::BufferCopy& copy : copies) { 165 for (const VideoCommon::BufferCopy& copy : copies) {
153 glCopyNamedBufferSubData( 166 glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
154 src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), 167 static_cast<GLintptr>(copy.dst_offset),
155 static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); 168 static_cast<GLsizeiptr>(copy.size));
169 }
170 if (barrier) {
171 PostCopyBarrier();
156 } 172 }
157} 173}
158 174
175void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
176 std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
177 CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
178}
179
180void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
181 std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
182 CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
183}
184
185void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
186 std::span<const VideoCommon::BufferCopy> copies) {
187 CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
188}
189
190void BufferCacheRuntime::PreCopyBarrier() {
191 // TODO: finer grained barrier?
192 glMemoryBarrier(GL_ALL_BARRIER_BITS);
193}
194
195void BufferCacheRuntime::PostCopyBarrier() {
196 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
197}
198
199void BufferCacheRuntime::Finish() {
200 glFinish();
201}
202
159void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { 203void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
160 glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), 204 glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
161 static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value); 205 static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 18d3c3ac0..a24991585 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -12,7 +12,7 @@
12#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_opengl/gl_device.h" 13#include "video_core/renderer_opengl/gl_device.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_stream_buffer.h" 15#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
16 16
17namespace OpenGL { 17namespace OpenGL {
18 18
@@ -60,11 +60,28 @@ class BufferCacheRuntime {
60public: 60public:
61 static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); 61 static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
62 62
63 explicit BufferCacheRuntime(const Device& device_); 63 explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
64
65 [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
66
67 [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
68
69 void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
70 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
71
72 void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
73 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
74
75 void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
76 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
64 77
65 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 78 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
66 std::span<const VideoCommon::BufferCopy> copies); 79 std::span<const VideoCommon::BufferCopy> copies);
67 80
81 void PreCopyBarrier();
82 void PostCopyBarrier();
83 void Finish();
84
68 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); 85 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
69 86
70 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); 87 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@@ -169,6 +186,7 @@ private:
169 }; 186 };
170 187
171 const Device& device; 188 const Device& device;
189 StagingBufferPool& staging_buffer_pool;
172 190
173 bool has_fast_buffer_sub_data = false; 191 bool has_fast_buffer_sub_data = false;
174 bool use_assembly_shaders = false; 192 bool use_assembly_shaders = false;
@@ -201,7 +219,7 @@ private:
201struct BufferCacheParams { 219struct BufferCacheParams {
202 using Runtime = OpenGL::BufferCacheRuntime; 220 using Runtime = OpenGL::BufferCacheRuntime;
203 using Buffer = OpenGL::Buffer; 221 using Buffer = OpenGL::Buffer;
204 using Async_Buffer = u32; 222 using Async_Buffer = OpenGL::StagingBufferMap;
205 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; 223 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
206 224
207 static constexpr bool IS_OPENGL = true; 225 static constexpr bool IS_OPENGL = true;
@@ -209,9 +227,12 @@ struct BufferCacheParams {
209 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; 227 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
210 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; 228 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
211 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 229 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
212 static constexpr bool USE_MEMORY_MAPS = false; 230 static constexpr bool USE_MEMORY_MAPS = true;
213 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; 231 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
214 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; 232 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
233
234 // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
235 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
215}; 236};
216 237
217using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 238using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f5baa0f3c..fc711c44a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -24,6 +24,7 @@
24#include "video_core/renderer_opengl/gl_query_cache.h" 24#include "video_core/renderer_opengl/gl_query_cache.h"
25#include "video_core/renderer_opengl/gl_rasterizer.h" 25#include "video_core/renderer_opengl/gl_rasterizer.h"
26#include "video_core/renderer_opengl/gl_shader_cache.h" 26#include "video_core/renderer_opengl/gl_shader_cache.h"
27#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
27#include "video_core/renderer_opengl/gl_texture_cache.h" 28#include "video_core/renderer_opengl/gl_texture_cache.h"
28#include "video_core/renderer_opengl/maxwell_to_gl.h" 29#include "video_core/renderer_opengl/maxwell_to_gl.h"
29#include "video_core/renderer_opengl/renderer_opengl.h" 30#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
58 StateTracker& state_tracker_) 59 StateTracker& state_tracker_)
59 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), 60 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
60 program_manager(program_manager_), state_tracker(state_tracker_), 61 program_manager(program_manager_), state_tracker(state_tracker_),
61 texture_cache_runtime(device, program_manager, state_tracker), 62 texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
62 texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), 63 texture_cache(texture_cache_runtime, *this),
64 buffer_cache_runtime(device, staging_buffer_pool),
63 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 65 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
64 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, 66 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
65 state_tracker, gpu.ShaderNotify()), 67 state_tracker, gpu.ShaderNotify()),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 410d8ffc5..a73ad15c1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -230,6 +230,7 @@ private:
230 ProgramManager& program_manager; 230 ProgramManager& program_manager;
231 StateTracker& state_tracker; 231 StateTracker& state_tracker;
232 232
233 StagingBufferPool staging_buffer_pool;
233 TextureCacheRuntime texture_cache_runtime; 234 TextureCacheRuntime texture_cache_runtime;
234 TextureCache texture_cache; 235 TextureCache texture_cache;
235 BufferCacheRuntime buffer_cache_runtime; 236 BufferCacheRuntime buffer_cache_runtime;
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
new file mode 100644
index 000000000..bbb06e51f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
@@ -0,0 +1,150 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <array>
5#include <memory>
6#include <span>
7
8#include <glad/glad.h>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "common/bit_util.h"
13#include "common/microprofile.h"
14#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
15
16MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192));
17
18namespace OpenGL {
19
20StagingBufferMap::~StagingBufferMap() {
21 if (sync) {
22 sync->Create();
23 }
24}
25
26StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
27 : storage_flags{storage_flags_}, map_flags{map_flags_} {}
28
29StagingBuffers::~StagingBuffers() = default;
30
31StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
32 MICROPROFILE_SCOPE(OpenGL_BufferRequest);
33
34 const size_t index = RequestBuffer(requested_size);
35 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
36 sync_indices[index] = insert_fence ? ++current_sync_index : 0;
37 return StagingBufferMap{
38 .mapped_span = std::span(maps[index], requested_size),
39 .sync = sync,
40 .buffer = buffers[index].handle,
41 };
42}
43
44size_t StagingBuffers::RequestBuffer(size_t requested_size) {
45 if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
46 return *index;
47 }
48
49 OGLBuffer& buffer = buffers.emplace_back();
50 buffer.Create();
51 const auto next_pow2_size = Common::NextPow2(requested_size);
52 glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
53 storage_flags | GL_MAP_PERSISTENT_BIT);
54 maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
55 map_flags | GL_MAP_PERSISTENT_BIT)));
56 syncs.emplace_back();
57 sync_indices.emplace_back();
58 sizes.push_back(next_pow2_size);
59
60 ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
61 maps.size() == sizes.size());
62
63 return buffers.size() - 1;
64}
65
66std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
67 size_t known_unsignaled_index = current_sync_index + 1;
68 size_t smallest_buffer = std::numeric_limits<size_t>::max();
69 std::optional<size_t> found;
70 const size_t num_buffers = sizes.size();
71 for (size_t index = 0; index < num_buffers; ++index) {
72 const size_t buffer_size = sizes[index];
73 if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
74 continue;
75 }
76 if (syncs[index].handle != 0) {
77 if (sync_indices[index] >= known_unsignaled_index) {
78 // This fence is later than a fence that is known to not be signaled
79 continue;
80 }
81 if (!syncs[index].IsSignaled()) {
82 // Since this fence hasn't been signaled, it's safe to assume all later
83 // fences haven't been signaled either
84 known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
85 continue;
86 }
87 syncs[index].Release();
88 }
89 smallest_buffer = buffer_size;
90 found = index;
91 }
92 return found;
93}
94
95StreamBuffer::StreamBuffer() {
96 static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
97 buffer.Create();
98 glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
99 glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
100 mapped_pointer =
101 static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
102 for (OGLSync& sync : fences) {
103 sync.Create();
104 }
105}
106
107std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
108 ASSERT(size < REGION_SIZE);
109 for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
110 ++region) {
111 fences[region].Create();
112 }
113 used_iterator = iterator;
114
115 for (size_t region = Region(free_iterator) + 1,
116 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
117 region < region_end; ++region) {
118 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
119 fences[region].Release();
120 }
121 if (iterator + size >= free_iterator) {
122 free_iterator = iterator + size;
123 }
124 if (iterator + size > STREAM_BUFFER_SIZE) {
125 for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
126 fences[region].Create();
127 }
128 used_iterator = 0;
129 iterator = 0;
130 free_iterator = size;
131
132 for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
133 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
134 fences[region].Release();
135 }
136 }
137 const size_t offset = iterator;
138 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
139 return {std::span(mapped_pointer + offset, size), offset};
140}
141
142StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
143 return upload_buffers.RequestMap(size, true);
144}
145
146StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
147 return download_buffers.RequestMap(size, false);
148}
149
150} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
index 8fe927aaf..60f72d3a0 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
@@ -4,8 +4,10 @@
4#pragma once 4#pragma once
5 5
6#include <array> 6#include <array>
7#include <optional>
7#include <span> 8#include <span>
8#include <utility> 9#include <utility>
10#include <vector>
9 11
10#include <glad/glad.h> 12#include <glad/glad.h>
11 13
@@ -17,6 +19,35 @@ namespace OpenGL {
17 19
18using namespace Common::Literals; 20using namespace Common::Literals;
19 21
22struct StagingBufferMap {
23 ~StagingBufferMap();
24
25 std::span<u8> mapped_span;
26 size_t offset = 0;
27 OGLSync* sync;
28 GLuint buffer;
29};
30
31struct StagingBuffers {
32 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
33 ~StagingBuffers();
34
35 StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
36
37 size_t RequestBuffer(size_t requested_size);
38
39 std::optional<size_t> FindBuffer(size_t requested_size);
40
41 std::vector<OGLSync> syncs;
42 std::vector<OGLBuffer> buffers;
43 std::vector<u8*> maps;
44 std::vector<size_t> sizes;
45 std::vector<size_t> sync_indices;
46 GLenum storage_flags;
47 GLenum map_flags;
48 size_t current_sync_index = 0;
49};
50
20class StreamBuffer { 51class StreamBuffer {
21 static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; 52 static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
22 static constexpr size_t NUM_SYNCS = 16; 53 static constexpr size_t NUM_SYNCS = 16;
@@ -48,4 +79,17 @@ private:
48 std::array<OGLSync, NUM_SYNCS> fences; 79 std::array<OGLSync, NUM_SYNCS> fences;
49}; 80};
50 81
82class StagingBufferPool {
83public:
84 StagingBufferPool() = default;
85 ~StagingBufferPool() = default;
86
87 StagingBufferMap RequestUploadBuffer(size_t size);
88 StagingBufferMap RequestDownloadBuffer(size_t size);
89
90private:
91 StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
92 StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
93};
94
51} // namespace OpenGL 95} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
deleted file mode 100644
index 2005c8993..000000000
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <array>
5#include <memory>
6#include <span>
7
8#include <glad/glad.h>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "video_core/renderer_opengl/gl_stream_buffer.h"
13
14namespace OpenGL {
15
16StreamBuffer::StreamBuffer() {
17 static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
18 buffer.Create();
19 glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
20 glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
21 mapped_pointer =
22 static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
23 for (OGLSync& sync : fences) {
24 sync.Create();
25 }
26}
27
28std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
29 ASSERT(size < REGION_SIZE);
30 for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
31 ++region) {
32 fences[region].Create();
33 }
34 used_iterator = iterator;
35
36 for (size_t region = Region(free_iterator) + 1,
37 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
38 region < region_end; ++region) {
39 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
40 fences[region].Release();
41 }
42 if (iterator + size >= free_iterator) {
43 free_iterator = iterator + size;
44 }
45 if (iterator + size > STREAM_BUFFER_SIZE) {
46 for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
47 fences[region].Create();
48 }
49 used_iterator = 0;
50 iterator = 0;
51 free_iterator = size;
52
53 for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
54 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
55 fences[region].Release();
56 }
57 }
58 const size_t offset = iterator;
59 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
60 return {std::span(mapped_pointer + offset, size), offset};
61}
62
63} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 56d0ff869..1c5dbcdd8 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
456 return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; 456 return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
457 } 457 }
458} 458}
459
460} // Anonymous namespace 459} // Anonymous namespace
461 460
462ImageBufferMap::~ImageBufferMap() {
463 if (sync) {
464 sync->Create();
465 }
466}
467
468TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, 461TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
469 StateTracker& state_tracker_) 462 StateTracker& state_tracker_,
470 : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), 463 StagingBufferPool& staging_buffer_pool_)
471 format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { 464 : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
465 util_shaders(program_manager), format_conversion_pass{util_shaders},
466 resolution{Settings::values.resolution_info} {
472 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; 467 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
473 for (size_t i = 0; i < TARGETS.size(); ++i) { 468 for (size_t i = 0; i < TARGETS.size(); ++i) {
474 const GLenum target = TARGETS[i]; 469 const GLenum target = TARGETS[i];
@@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() {
558 glFinish(); 553 glFinish();
559} 554}
560 555
561ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { 556StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
562 return upload_buffers.RequestMap(size, true); 557 return staging_buffer_pool.RequestUploadBuffer(size);
563} 558}
564 559
565ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { 560StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
566 return download_buffers.RequestMap(size, false); 561 return staging_buffer_pool.RequestDownloadBuffer(size);
567} 562}
568 563
569u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { 564u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
648 is_linear ? GL_LINEAR : GL_NEAREST); 643 is_linear ? GL_LINEAR : GL_NEAREST);
649} 644}
650 645
651void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, 646void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
652 std::span<const SwizzleParameters> swizzles) { 647 std::span<const SwizzleParameters> swizzles) {
653 switch (image.info.type) { 648 switch (image.info.type) {
654 case ImageType::e2D: 649 case ImageType::e2D:
@@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
690 return device.HasASTC(); 685 return device.HasASTC();
691} 686}
692 687
693TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
694 : storage_flags{storage_flags_}, map_flags{map_flags_} {}
695
696TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
697
698ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
699 bool insert_fence) {
700 const size_t index = RequestBuffer(requested_size);
701 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
702 return ImageBufferMap{
703 .mapped_span = std::span(maps[index], requested_size),
704 .sync = sync,
705 .buffer = buffers[index].handle,
706 };
707}
708
709size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
710 if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
711 return *index;
712 }
713
714 OGLBuffer& buffer = buffers.emplace_back();
715 buffer.Create();
716 glNamedBufferStorage(buffer.handle, requested_size, nullptr,
717 storage_flags | GL_MAP_PERSISTENT_BIT);
718 maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
719 map_flags | GL_MAP_PERSISTENT_BIT)));
720
721 syncs.emplace_back();
722 sizes.push_back(requested_size);
723
724 ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
725 maps.size() == sizes.size());
726
727 return buffers.size() - 1;
728}
729
730std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
731 size_t smallest_buffer = std::numeric_limits<size_t>::max();
732 std::optional<size_t> found;
733 const size_t num_buffers = sizes.size();
734 for (size_t index = 0; index < num_buffers; ++index) {
735 const size_t buffer_size = sizes[index];
736 if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
737 continue;
738 }
739 if (syncs[index].handle != 0) {
740 if (!syncs[index].IsSignaled()) {
741 continue;
742 }
743 syncs[index].Release();
744 }
745 smallest_buffer = buffer_size;
746 found = index;
747 }
748 return found;
749}
750
751Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, 688Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
752 VAddr cpu_addr_) 689 VAddr cpu_addr_)
753 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { 690 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
823 } 760 }
824} 761}
825 762
826void Image::UploadMemory(const ImageBufferMap& map, 763void Image::UploadMemory(const StagingBufferMap& map,
827 std::span<const VideoCommon::BufferImageCopy> copies) { 764 std::span<const VideoCommon::BufferImageCopy> copies) {
828 UploadMemory(map.buffer, map.offset, copies); 765 UploadMemory(map.buffer, map.offset, copies);
829} 766}
@@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
870 } 807 }
871} 808}
872 809
873void Image::DownloadMemory(ImageBufferMap& map, 810void Image::DownloadMemory(StagingBufferMap& map,
874 std::span<const VideoCommon::BufferImageCopy> copies) { 811 std::span<const VideoCommon::BufferImageCopy> copies) {
875 DownloadMemory(map.buffer, map.offset, copies); 812 DownloadMemory(map.buffer, map.offset, copies);
876} 813}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3e9b3302b..1148b73d7 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -11,6 +11,7 @@
11#include "shader_recompiler/shader_info.h" 11#include "shader_recompiler/shader_info.h"
12#include "video_core/renderer_opengl/gl_device.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
14#include "video_core/renderer_opengl/util_shaders.h" 15#include "video_core/renderer_opengl/util_shaders.h"
15#include "video_core/texture_cache/image_view_base.h" 16#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/texture_cache_base.h" 17#include "video_core/texture_cache/texture_cache_base.h"
@@ -37,15 +38,6 @@ using VideoCommon::Region2D;
37using VideoCommon::RenderTargets; 38using VideoCommon::RenderTargets;
38using VideoCommon::SlotVector; 39using VideoCommon::SlotVector;
39 40
40struct ImageBufferMap {
41 ~ImageBufferMap();
42
43 std::span<u8> mapped_span;
44 size_t offset = 0;
45 OGLSync* sync;
46 GLuint buffer;
47};
48
49struct FormatProperties { 41struct FormatProperties {
50 GLenum compatibility_class; 42 GLenum compatibility_class;
51 bool compatibility_by_size; 43 bool compatibility_by_size;
@@ -74,14 +66,15 @@ class TextureCacheRuntime {
74 66
75public: 67public:
76 explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, 68 explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
77 StateTracker& state_tracker); 69 StateTracker& state_tracker,
70 StagingBufferPool& staging_buffer_pool);
78 ~TextureCacheRuntime(); 71 ~TextureCacheRuntime();
79 72
80 void Finish(); 73 void Finish();
81 74
82 ImageBufferMap UploadStagingBuffer(size_t size); 75 StagingBufferMap UploadStagingBuffer(size_t size);
83 76
84 ImageBufferMap DownloadStagingBuffer(size_t size); 77 StagingBufferMap DownloadStagingBuffer(size_t size);
85 78
86 u64 GetDeviceLocalMemory() const { 79 u64 GetDeviceLocalMemory() const {
87 return device_access_memory; 80 return device_access_memory;
@@ -120,7 +113,7 @@ public:
120 const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, 113 const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
121 Tegra::Engines::Fermi2D::Operation operation); 114 Tegra::Engines::Fermi2D::Operation operation);
122 115
123 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, 116 void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
124 std::span<const VideoCommon::SwizzleParameters> swizzles); 117 std::span<const VideoCommon::SwizzleParameters> swizzles);
125 118
126 void InsertUploadMemoryBarrier(); 119 void InsertUploadMemoryBarrier();
@@ -149,35 +142,16 @@ public:
149 } 142 }
150 143
151private: 144private:
152 struct StagingBuffers {
153 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
154 ~StagingBuffers();
155
156 ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
157
158 size_t RequestBuffer(size_t requested_size);
159
160 std::optional<size_t> FindBuffer(size_t requested_size);
161
162 std::vector<OGLSync> syncs;
163 std::vector<OGLBuffer> buffers;
164 std::vector<u8*> maps;
165 std::vector<size_t> sizes;
166 GLenum storage_flags;
167 GLenum map_flags;
168 };
169
170 const Device& device; 145 const Device& device;
171 StateTracker& state_tracker; 146 StateTracker& state_tracker;
147 StagingBufferPool& staging_buffer_pool;
148
172 UtilShaders util_shaders; 149 UtilShaders util_shaders;
173 FormatConversionPass format_conversion_pass; 150 FormatConversionPass format_conversion_pass;
174 151
175 std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; 152 std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
176 bool has_broken_texture_view_formats = false; 153 bool has_broken_texture_view_formats = false;
177 154
178 StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
179 StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
180
181 OGLTexture null_image_1d_array; 155 OGLTexture null_image_1d_array;
182 OGLTexture null_image_cube_array; 156 OGLTexture null_image_cube_array;
183 OGLTexture null_image_3d; 157 OGLTexture null_image_3d;
@@ -213,7 +187,7 @@ public:
213 void UploadMemory(GLuint buffer_handle, size_t buffer_offset, 187 void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
214 std::span<const VideoCommon::BufferImageCopy> copies); 188 std::span<const VideoCommon::BufferImageCopy> copies);
215 189
216 void UploadMemory(const ImageBufferMap& map, 190 void UploadMemory(const StagingBufferMap& map,
217 std::span<const VideoCommon::BufferImageCopy> copies); 191 std::span<const VideoCommon::BufferImageCopy> copies);
218 192
219 void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, 193 void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
@@ -222,7 +196,8 @@ public:
222 void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, 196 void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
223 std::span<const VideoCommon::BufferImageCopy> copies); 197 std::span<const VideoCommon::BufferImageCopy> copies);
224 198
225 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); 199 void DownloadMemory(StagingBufferMap& map,
200 std::span<const VideoCommon::BufferImageCopy> copies);
226 201
227 GLuint StorageHandle() noexcept; 202 GLuint StorageHandle() noexcept;
228 203
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 2c7ac210b..544982d18 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -19,6 +19,7 @@
19#include "video_core/host_shaders/pitch_unswizzle_comp.h" 19#include "video_core/host_shaders/pitch_unswizzle_comp.h"
20#include "video_core/renderer_opengl/gl_shader_manager.h" 20#include "video_core/renderer_opengl/gl_shader_manager.h"
21#include "video_core/renderer_opengl/gl_shader_util.h" 21#include "video_core/renderer_opengl/gl_shader_util.h"
22#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
22#include "video_core/renderer_opengl/gl_texture_cache.h" 23#include "video_core/renderer_opengl/gl_texture_cache.h"
23#include "video_core/renderer_opengl/util_shaders.h" 24#include "video_core/renderer_opengl/util_shaders.h"
24#include "video_core/texture_cache/accelerated_swizzle.h" 25#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
63 64
64UtilShaders::~UtilShaders() = default; 65UtilShaders::~UtilShaders() = default;
65 66
66void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, 67void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
67 std::span<const VideoCommon::SwizzleParameters> swizzles) { 68 std::span<const VideoCommon::SwizzleParameters> swizzles) {
68 static constexpr GLuint BINDING_INPUT_BUFFER = 0; 69 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
69 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 70 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
111 program_manager.RestoreGuestCompute(); 112 program_manager.RestoreGuestCompute();
112} 113}
113 114
114void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, 115void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
115 std::span<const SwizzleParameters> swizzles) { 116 std::span<const SwizzleParameters> swizzles) {
116 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 117 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
117 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; 118 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
148 program_manager.RestoreGuestCompute(); 149 program_manager.RestoreGuestCompute();
149} 150}
150 151
151void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, 152void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
152 std::span<const SwizzleParameters> swizzles) { 153 std::span<const SwizzleParameters> swizzles) {
153 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; 154 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
154 155
@@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
189 program_manager.RestoreGuestCompute(); 190 program_manager.RestoreGuestCompute();
190} 191}
191 192
192void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, 193void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
193 std::span<const SwizzleParameters> swizzles) { 194 std::span<const SwizzleParameters> swizzles) {
194 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 195 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
195 static constexpr GLuint BINDING_INPUT_BUFFER = 0; 196 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 9013808e7..feecd404c 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -16,23 +16,23 @@ namespace OpenGL {
16class Image; 16class Image;
17class ProgramManager; 17class ProgramManager;
18 18
19struct ImageBufferMap; 19struct StagingBufferMap;
20 20
21class UtilShaders { 21class UtilShaders {
22public: 22public:
23 explicit UtilShaders(ProgramManager& program_manager); 23 explicit UtilShaders(ProgramManager& program_manager);
24 ~UtilShaders(); 24 ~UtilShaders();
25 25
26 void ASTCDecode(Image& image, const ImageBufferMap& map, 26 void ASTCDecode(Image& image, const StagingBufferMap& map,
27 std::span<const VideoCommon::SwizzleParameters> swizzles); 27 std::span<const VideoCommon::SwizzleParameters> swizzles);
28 28
29 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, 29 void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
30 std::span<const VideoCommon::SwizzleParameters> swizzles); 30 std::span<const VideoCommon::SwizzleParameters> swizzles);
31 31
32 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, 32 void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
33 std::span<const VideoCommon::SwizzleParameters> swizzles); 33 std::span<const VideoCommon::SwizzleParameters> swizzles);
34 34
35 void PitchUpload(Image& image, const ImageBufferMap& map, 35 void PitchUpload(Image& image, const StagingBufferMap& map,
36 std::span<const VideoCommon::SwizzleParameters> swizzles); 36 std::span<const VideoCommon::SwizzleParameters> swizzles);
37 37
38 void CopyBC4(Image& dst_image, Image& src_image, 38 void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 794dd0758..92b4f7859 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -157,6 +157,7 @@ struct BufferCacheParams {
157 static constexpr bool USE_MEMORY_MAPS = true; 157 static constexpr bool USE_MEMORY_MAPS = true;
158 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; 158 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
159 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; 159 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
160 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
160}; 161};
161 162
162using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 163using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;