diff options
| author | 2023-06-07 14:03:57 -0400 | |
|---|---|---|
| committer | 2023-06-07 14:03:57 -0400 | |
| commit | cfb76d8f3ed8862bc341afeaf6d25a401e2976cf (patch) | |
| tree | 30098242f24010db0da3cd6152a4a716d739b20d /src | |
| parent | Merge pull request #10583 from ameerj/ill-logic (diff) | |
| parent | gl_staging_buffers: Optimization to reduce fence waiting (diff) | |
| download | yuzu-cfb76d8f3ed8862bc341afeaf6d25a401e2976cf.tar.gz yuzu-cfb76d8f3ed8862bc341afeaf6d25a401e2976cf.tar.xz yuzu-cfb76d8f3ed8862bc341afeaf6d25a401e2976cf.zip | |
Merge pull request #10476 from ameerj/gl-memory-maps
OpenGL: Make use of persistent buffer maps in buffer cache
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 10 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 58 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp | 150 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_staging_buffer_pool.h (renamed from src/video_core/renderer_opengl/gl_stream_buffer.h) | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 63 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 87 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.h | 1 |
15 files changed, 316 insertions, 204 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 94e3000ba..bf6439530 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -133,8 +133,8 @@ add_library(video_core STATIC | |||
| 133 | renderer_opengl/gl_shader_util.h | 133 | renderer_opengl/gl_shader_util.h |
| 134 | renderer_opengl/gl_state_tracker.cpp | 134 | renderer_opengl/gl_state_tracker.cpp |
| 135 | renderer_opengl/gl_state_tracker.h | 135 | renderer_opengl/gl_state_tracker.h |
| 136 | renderer_opengl/gl_stream_buffer.cpp | 136 | renderer_opengl/gl_staging_buffer_pool.cpp |
| 137 | renderer_opengl/gl_stream_buffer.h | 137 | renderer_opengl/gl_staging_buffer_pool.h |
| 138 | renderer_opengl/gl_texture_cache.cpp | 138 | renderer_opengl/gl_texture_cache.cpp |
| 139 | renderer_opengl/gl_texture_cache.h | 139 | renderer_opengl/gl_texture_cache.h |
| 140 | renderer_opengl/gl_texture_cache_base.cpp | 140 | renderer_opengl/gl_texture_cache_base.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f1ad5f7cb..2f281b370 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -478,7 +478,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 478 | 478 | ||
| 479 | if (committed_ranges.empty()) { | 479 | if (committed_ranges.empty()) { |
| 480 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 480 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 481 | |||
| 482 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 481 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); |
| 483 | } | 482 | } |
| 484 | return; | 483 | return; |
| @@ -539,7 +538,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 539 | committed_ranges.clear(); | 538 | committed_ranges.clear(); |
| 540 | if (downloads.empty()) { | 539 | if (downloads.empty()) { |
| 541 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 540 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 542 | |||
| 543 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 541 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); |
| 544 | } | 542 | } |
| 545 | return; | 543 | return; |
| @@ -691,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 691 | const u32 size = channel_state->index_buffer.size; | 689 | const u32 size = channel_state->index_buffer.size; |
| 692 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 690 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 693 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { | 691 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 694 | if constexpr (USE_MEMORY_MAPS) { | 692 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { |
| 695 | auto upload_staging = runtime.UploadStagingBuffer(size); | 693 | auto upload_staging = runtime.UploadStagingBuffer(size); |
| 696 | std::array<BufferCopy, 1> copies{ | 694 | std::array<BufferCopy, 1> copies{ |
| 697 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; | 695 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; |
| @@ -1462,7 +1460,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, | |||
| 1462 | template <class P> | 1460 | template <class P> |
| 1463 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1461 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1464 | std::span<BufferCopy> copies) { | 1462 | std::span<BufferCopy> copies) { |
| 1465 | if constexpr (USE_MEMORY_MAPS) { | 1463 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { |
| 1466 | MappedUploadMemory(buffer, total_size_bytes, copies); | 1464 | MappedUploadMemory(buffer, total_size_bytes, copies); |
| 1467 | } else { | 1465 | } else { |
| 1468 | ImmediateUploadMemory(buffer, largest_copy, copies); | 1466 | ImmediateUploadMemory(buffer, largest_copy, copies); |
| @@ -1473,7 +1471,7 @@ template <class P> | |||
| 1473 | void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, | 1471 | void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1474 | [[maybe_unused]] u64 largest_copy, | 1472 | [[maybe_unused]] u64 largest_copy, |
| 1475 | [[maybe_unused]] std::span<const BufferCopy> copies) { | 1473 | [[maybe_unused]] std::span<const BufferCopy> copies) { |
| 1476 | if constexpr (!USE_MEMORY_MAPS) { | 1474 | if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) { |
| 1477 | std::span<u8> immediate_buffer; | 1475 | std::span<u8> immediate_buffer; |
| 1478 | for (const BufferCopy& copy : copies) { | 1476 | for (const BufferCopy& copy : copies) { |
| 1479 | std::span<const u8> upload_span; | 1477 | std::span<const u8> upload_span; |
| @@ -1532,7 +1530,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1532 | auto& buffer = slot_buffers[buffer_id]; | 1530 | auto& buffer = slot_buffers[buffer_id]; |
| 1533 | SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); | 1531 | SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); |
| 1534 | 1532 | ||
| 1535 | if constexpr (USE_MEMORY_MAPS) { | 1533 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { |
| 1536 | auto upload_staging = runtime.UploadStagingBuffer(copy_size); | 1534 | auto upload_staging = runtime.UploadStagingBuffer(copy_size); |
| 1537 | std::array copies{BufferCopy{ | 1535 | std::array copies{BufferCopy{ |
| 1538 | .src_offset = upload_staging.offset, | 1536 | .src_offset = upload_staging.offset, |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c689fe06b..60a1f285e 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -173,6 +173,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 173 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | 173 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; |
| 174 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | 174 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; |
| 175 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | 175 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; |
| 176 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; | ||
| 176 | 177 | ||
| 177 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | 178 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; |
| 178 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | 179 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6d3bda192..c419714d4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { | |||
| 106 | return views.back().texture.handle; | 106 | return views.back().texture.handle; |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | 109 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, |
| 110 | : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | 110 | StagingBufferPool& staging_buffer_pool_) |
| 111 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 112 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | ||
| 111 | use_assembly_shaders{device.UseAssemblyShaders()}, | 113 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| 112 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | 114 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, |
| 113 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | 115 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { |
| @@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | |||
| 140 | }(); | 142 | }(); |
| 141 | } | 143 | } |
| 142 | 144 | ||
| 145 | StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { | ||
| 146 | return staging_buffer_pool.RequestUploadBuffer(size); | ||
| 147 | } | ||
| 148 | |||
| 149 | StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | ||
| 150 | return staging_buffer_pool.RequestDownloadBuffer(size); | ||
| 151 | } | ||
| 152 | |||
| 143 | u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { | 153 | u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { |
| 144 | if (device.CanReportMemoryUsage()) { | 154 | if (device.CanReportMemoryUsage()) { |
| 145 | return device_access_memory - device.GetCurrentDedicatedVideoMemory(); | 155 | return device_access_memory - device.GetCurrentDedicatedVideoMemory(); |
| @@ -147,15 +157,49 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { | |||
| 147 | return 2_GiB; | 157 | return 2_GiB; |
| 148 | } | 158 | } |
| 149 | 159 | ||
| 150 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 160 | void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer, |
| 151 | std::span<const VideoCommon::BufferCopy> copies) { | 161 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { |
| 162 | if (barrier) { | ||
| 163 | PreCopyBarrier(); | ||
| 164 | } | ||
| 152 | for (const VideoCommon::BufferCopy& copy : copies) { | 165 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 153 | glCopyNamedBufferSubData( | 166 | glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset), |
| 154 | src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), | 167 | static_cast<GLintptr>(copy.dst_offset), |
| 155 | static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); | 168 | static_cast<GLsizeiptr>(copy.size)); |
| 169 | } | ||
| 170 | if (barrier) { | ||
| 171 | PostCopyBarrier(); | ||
| 156 | } | 172 | } |
| 157 | } | 173 | } |
| 158 | 174 | ||
| 175 | void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, | ||
| 176 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | ||
| 177 | CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier); | ||
| 178 | } | ||
| 179 | |||
| 180 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, | ||
| 181 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | ||
| 182 | CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); | ||
| 183 | } | ||
| 184 | |||
| 185 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | ||
| 186 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 187 | CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); | ||
| 188 | } | ||
| 189 | |||
| 190 | void BufferCacheRuntime::PreCopyBarrier() { | ||
| 191 | // TODO: finer grained barrier? | ||
| 192 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | ||
| 193 | } | ||
| 194 | |||
| 195 | void BufferCacheRuntime::PostCopyBarrier() { | ||
| 196 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); | ||
| 197 | } | ||
| 198 | |||
| 199 | void BufferCacheRuntime::Finish() { | ||
| 200 | glFinish(); | ||
| 201 | } | ||
| 202 | |||
| 159 | void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { | 203 | void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { |
| 160 | glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), | 204 | glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), |
| 161 | static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value); | 205 | static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value); |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 18d3c3ac0..a24991585 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_opengl/gl_device.h" | 13 | #include "video_core/renderer_opengl/gl_device.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 15 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" |
| 16 | 16 | ||
| 17 | namespace OpenGL { | 17 | namespace OpenGL { |
| 18 | 18 | ||
| @@ -60,11 +60,28 @@ class BufferCacheRuntime { | |||
| 60 | public: | 60 | public: |
| 61 | static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); | 61 | static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); |
| 62 | 62 | ||
| 63 | explicit BufferCacheRuntime(const Device& device_); | 63 | explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); |
| 64 | |||
| 65 | [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); | ||
| 66 | |||
| 67 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); | ||
| 68 | |||
| 69 | void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, | ||
| 70 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | ||
| 71 | |||
| 72 | void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, | ||
| 73 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | ||
| 74 | |||
| 75 | void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, | ||
| 76 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | ||
| 64 | 77 | ||
| 65 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 78 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 66 | std::span<const VideoCommon::BufferCopy> copies); | 79 | std::span<const VideoCommon::BufferCopy> copies); |
| 67 | 80 | ||
| 81 | void PreCopyBarrier(); | ||
| 82 | void PostCopyBarrier(); | ||
| 83 | void Finish(); | ||
| 84 | |||
| 68 | void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); | 85 | void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); |
| 69 | 86 | ||
| 70 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); | 87 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); |
| @@ -169,6 +186,7 @@ private: | |||
| 169 | }; | 186 | }; |
| 170 | 187 | ||
| 171 | const Device& device; | 188 | const Device& device; |
| 189 | StagingBufferPool& staging_buffer_pool; | ||
| 172 | 190 | ||
| 173 | bool has_fast_buffer_sub_data = false; | 191 | bool has_fast_buffer_sub_data = false; |
| 174 | bool use_assembly_shaders = false; | 192 | bool use_assembly_shaders = false; |
| @@ -201,7 +219,7 @@ private: | |||
| 201 | struct BufferCacheParams { | 219 | struct BufferCacheParams { |
| 202 | using Runtime = OpenGL::BufferCacheRuntime; | 220 | using Runtime = OpenGL::BufferCacheRuntime; |
| 203 | using Buffer = OpenGL::Buffer; | 221 | using Buffer = OpenGL::Buffer; |
| 204 | using Async_Buffer = u32; | 222 | using Async_Buffer = OpenGL::StagingBufferMap; |
| 205 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | 223 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; |
| 206 | 224 | ||
| 207 | static constexpr bool IS_OPENGL = true; | 225 | static constexpr bool IS_OPENGL = true; |
| @@ -209,9 +227,12 @@ struct BufferCacheParams { | |||
| 209 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; | 227 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; |
| 210 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | 228 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; |
| 211 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 229 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 212 | static constexpr bool USE_MEMORY_MAPS = false; | 230 | static constexpr bool USE_MEMORY_MAPS = true; |
| 213 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | 231 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; |
| 214 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | 232 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; |
| 233 | |||
| 234 | // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads | ||
| 235 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; | ||
| 215 | }; | 236 | }; |
| 216 | 237 | ||
| 217 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 238 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f5baa0f3c..fc711c44a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_query_cache.h" | 24 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 25 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 26 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 26 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 28 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 28 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 29 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 29 | #include "video_core/renderer_opengl/renderer_opengl.h" | 30 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| @@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 58 | StateTracker& state_tracker_) | 59 | StateTracker& state_tracker_) |
| 59 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), | 60 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), |
| 60 | program_manager(program_manager_), state_tracker(state_tracker_), | 61 | program_manager(program_manager_), state_tracker(state_tracker_), |
| 61 | texture_cache_runtime(device, program_manager, state_tracker), | 62 | texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), |
| 62 | texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), | 63 | texture_cache(texture_cache_runtime, *this), |
| 64 | buffer_cache_runtime(device, staging_buffer_pool), | ||
| 63 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 65 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 64 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, | 66 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, |
| 65 | state_tracker, gpu.ShaderNotify()), | 67 | state_tracker, gpu.ShaderNotify()), |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 410d8ffc5..a73ad15c1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -230,6 +230,7 @@ private: | |||
| 230 | ProgramManager& program_manager; | 230 | ProgramManager& program_manager; |
| 231 | StateTracker& state_tracker; | 231 | StateTracker& state_tracker; |
| 232 | 232 | ||
| 233 | StagingBufferPool staging_buffer_pool; | ||
| 233 | TextureCacheRuntime texture_cache_runtime; | 234 | TextureCacheRuntime texture_cache_runtime; |
| 234 | TextureCache texture_cache; | 235 | TextureCache texture_cache; |
| 235 | BufferCacheRuntime buffer_cache_runtime; | 236 | BufferCacheRuntime buffer_cache_runtime; |
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp new file mode 100644 index 000000000..bbb06e51f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | #include <memory> | ||
| 6 | #include <span> | ||
| 7 | |||
| 8 | #include <glad/glad.h> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/bit_util.h" | ||
| 13 | #include "common/microprofile.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | ||
| 15 | |||
| 16 | MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | StagingBufferMap::~StagingBufferMap() { | ||
| 21 | if (sync) { | ||
| 22 | sync->Create(); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | ||
| 27 | : storage_flags{storage_flags_}, map_flags{map_flags_} {} | ||
| 28 | |||
| 29 | StagingBuffers::~StagingBuffers() = default; | ||
| 30 | |||
| 31 | StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { | ||
| 32 | MICROPROFILE_SCOPE(OpenGL_BufferRequest); | ||
| 33 | |||
| 34 | const size_t index = RequestBuffer(requested_size); | ||
| 35 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | ||
| 36 | sync_indices[index] = insert_fence ? ++current_sync_index : 0; | ||
| 37 | return StagingBufferMap{ | ||
| 38 | .mapped_span = std::span(maps[index], requested_size), | ||
| 39 | .sync = sync, | ||
| 40 | .buffer = buffers[index].handle, | ||
| 41 | }; | ||
| 42 | } | ||
| 43 | |||
| 44 | size_t StagingBuffers::RequestBuffer(size_t requested_size) { | ||
| 45 | if (const std::optional<size_t> index = FindBuffer(requested_size); index) { | ||
| 46 | return *index; | ||
| 47 | } | ||
| 48 | |||
| 49 | OGLBuffer& buffer = buffers.emplace_back(); | ||
| 50 | buffer.Create(); | ||
| 51 | const auto next_pow2_size = Common::NextPow2(requested_size); | ||
| 52 | glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, | ||
| 53 | storage_flags | GL_MAP_PERSISTENT_BIT); | ||
| 54 | maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, | ||
| 55 | map_flags | GL_MAP_PERSISTENT_BIT))); | ||
| 56 | syncs.emplace_back(); | ||
| 57 | sync_indices.emplace_back(); | ||
| 58 | sizes.push_back(next_pow2_size); | ||
| 59 | |||
| 60 | ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && | ||
| 61 | maps.size() == sizes.size()); | ||
| 62 | |||
| 63 | return buffers.size() - 1; | ||
| 64 | } | ||
| 65 | |||
| 66 | std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { | ||
| 67 | size_t known_unsignaled_index = current_sync_index + 1; | ||
| 68 | size_t smallest_buffer = std::numeric_limits<size_t>::max(); | ||
| 69 | std::optional<size_t> found; | ||
| 70 | const size_t num_buffers = sizes.size(); | ||
| 71 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 72 | const size_t buffer_size = sizes[index]; | ||
| 73 | if (buffer_size < requested_size || buffer_size >= smallest_buffer) { | ||
| 74 | continue; | ||
| 75 | } | ||
| 76 | if (syncs[index].handle != 0) { | ||
| 77 | if (sync_indices[index] >= known_unsignaled_index) { | ||
| 78 | // This fence is later than a fence that is known to not be signaled | ||
| 79 | continue; | ||
| 80 | } | ||
| 81 | if (!syncs[index].IsSignaled()) { | ||
| 82 | // Since this fence hasn't been signaled, it's safe to assume all later | ||
| 83 | // fences haven't been signaled either | ||
| 84 | known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); | ||
| 85 | continue; | ||
| 86 | } | ||
| 87 | syncs[index].Release(); | ||
| 88 | } | ||
| 89 | smallest_buffer = buffer_size; | ||
| 90 | found = index; | ||
| 91 | } | ||
| 92 | return found; | ||
| 93 | } | ||
| 94 | |||
| 95 | StreamBuffer::StreamBuffer() { | ||
| 96 | static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; | ||
| 97 | buffer.Create(); | ||
| 98 | glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); | ||
| 99 | glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); | ||
| 100 | mapped_pointer = | ||
| 101 | static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); | ||
| 102 | for (OGLSync& sync : fences) { | ||
| 103 | sync.Create(); | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 107 | std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { | ||
| 108 | ASSERT(size < REGION_SIZE); | ||
| 109 | for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; | ||
| 110 | ++region) { | ||
| 111 | fences[region].Create(); | ||
| 112 | } | ||
| 113 | used_iterator = iterator; | ||
| 114 | |||
| 115 | for (size_t region = Region(free_iterator) + 1, | ||
| 116 | region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); | ||
| 117 | region < region_end; ++region) { | ||
| 118 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||
| 119 | fences[region].Release(); | ||
| 120 | } | ||
| 121 | if (iterator + size >= free_iterator) { | ||
| 122 | free_iterator = iterator + size; | ||
| 123 | } | ||
| 124 | if (iterator + size > STREAM_BUFFER_SIZE) { | ||
| 125 | for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { | ||
| 126 | fences[region].Create(); | ||
| 127 | } | ||
| 128 | used_iterator = 0; | ||
| 129 | iterator = 0; | ||
| 130 | free_iterator = size; | ||
| 131 | |||
| 132 | for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||
| 133 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||
| 134 | fences[region].Release(); | ||
| 135 | } | ||
| 136 | } | ||
| 137 | const size_t offset = iterator; | ||
| 138 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 139 | return {std::span(mapped_pointer + offset, size), offset}; | ||
| 140 | } | ||
| 141 | |||
| 142 | StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { | ||
| 143 | return upload_buffers.RequestMap(size, true); | ||
| 144 | } | ||
| 145 | |||
| 146 | StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { | ||
| 147 | return download_buffers.RequestMap(size, false); | ||
| 148 | } | ||
| 149 | |||
| 150 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 8fe927aaf..60f72d3a0 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h | |||
| @@ -4,8 +4,10 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <optional> | ||
| 7 | #include <span> | 8 | #include <span> |
| 8 | #include <utility> | 9 | #include <utility> |
| 10 | #include <vector> | ||
| 9 | 11 | ||
| 10 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 11 | 13 | ||
| @@ -17,6 +19,35 @@ namespace OpenGL { | |||
| 17 | 19 | ||
| 18 | using namespace Common::Literals; | 20 | using namespace Common::Literals; |
| 19 | 21 | ||
| 22 | struct StagingBufferMap { | ||
| 23 | ~StagingBufferMap(); | ||
| 24 | |||
| 25 | std::span<u8> mapped_span; | ||
| 26 | size_t offset = 0; | ||
| 27 | OGLSync* sync; | ||
| 28 | GLuint buffer; | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct StagingBuffers { | ||
| 32 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | ||
| 33 | ~StagingBuffers(); | ||
| 34 | |||
| 35 | StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); | ||
| 36 | |||
| 37 | size_t RequestBuffer(size_t requested_size); | ||
| 38 | |||
| 39 | std::optional<size_t> FindBuffer(size_t requested_size); | ||
| 40 | |||
| 41 | std::vector<OGLSync> syncs; | ||
| 42 | std::vector<OGLBuffer> buffers; | ||
| 43 | std::vector<u8*> maps; | ||
| 44 | std::vector<size_t> sizes; | ||
| 45 | std::vector<size_t> sync_indices; | ||
| 46 | GLenum storage_flags; | ||
| 47 | GLenum map_flags; | ||
| 48 | size_t current_sync_index = 0; | ||
| 49 | }; | ||
| 50 | |||
| 20 | class StreamBuffer { | 51 | class StreamBuffer { |
| 21 | static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; | 52 | static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; |
| 22 | static constexpr size_t NUM_SYNCS = 16; | 53 | static constexpr size_t NUM_SYNCS = 16; |
| @@ -48,4 +79,17 @@ private: | |||
| 48 | std::array<OGLSync, NUM_SYNCS> fences; | 79 | std::array<OGLSync, NUM_SYNCS> fences; |
| 49 | }; | 80 | }; |
| 50 | 81 | ||
| 82 | class StagingBufferPool { | ||
| 83 | public: | ||
| 84 | StagingBufferPool() = default; | ||
| 85 | ~StagingBufferPool() = default; | ||
| 86 | |||
| 87 | StagingBufferMap RequestUploadBuffer(size_t size); | ||
| 88 | StagingBufferMap RequestDownloadBuffer(size_t size); | ||
| 89 | |||
| 90 | private: | ||
| 91 | StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; | ||
| 92 | StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; | ||
| 93 | }; | ||
| 94 | |||
| 51 | } // namespace OpenGL | 95 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp deleted file mode 100644 index 2005c8993..000000000 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ /dev/null | |||
| @@ -1,63 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | #include <memory> | ||
| 6 | #include <span> | ||
| 7 | |||
| 8 | #include <glad/glad.h> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 13 | |||
| 14 | namespace OpenGL { | ||
| 15 | |||
| 16 | StreamBuffer::StreamBuffer() { | ||
| 17 | static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; | ||
| 18 | buffer.Create(); | ||
| 19 | glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); | ||
| 20 | glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); | ||
| 21 | mapped_pointer = | ||
| 22 | static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); | ||
| 23 | for (OGLSync& sync : fences) { | ||
| 24 | sync.Create(); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { | ||
| 29 | ASSERT(size < REGION_SIZE); | ||
| 30 | for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; | ||
| 31 | ++region) { | ||
| 32 | fences[region].Create(); | ||
| 33 | } | ||
| 34 | used_iterator = iterator; | ||
| 35 | |||
| 36 | for (size_t region = Region(free_iterator) + 1, | ||
| 37 | region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); | ||
| 38 | region < region_end; ++region) { | ||
| 39 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||
| 40 | fences[region].Release(); | ||
| 41 | } | ||
| 42 | if (iterator + size >= free_iterator) { | ||
| 43 | free_iterator = iterator + size; | ||
| 44 | } | ||
| 45 | if (iterator + size > STREAM_BUFFER_SIZE) { | ||
| 46 | for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { | ||
| 47 | fences[region].Create(); | ||
| 48 | } | ||
| 49 | used_iterator = 0; | ||
| 50 | iterator = 0; | ||
| 51 | free_iterator = size; | ||
| 52 | |||
| 53 | for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||
| 54 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||
| 55 | fences[region].Release(); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | const size_t offset = iterator; | ||
| 59 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 60 | return {std::span(mapped_pointer + offset, size), offset}; | ||
| 61 | } | ||
| 62 | |||
| 63 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 56d0ff869..1c5dbcdd8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form | |||
| 456 | return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 456 | return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 457 | } | 457 | } |
| 458 | } | 458 | } |
| 459 | |||
| 460 | } // Anonymous namespace | 459 | } // Anonymous namespace |
| 461 | 460 | ||
| 462 | ImageBufferMap::~ImageBufferMap() { | ||
| 463 | if (sync) { | ||
| 464 | sync->Create(); | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 468 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, | 461 | TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, |
| 469 | StateTracker& state_tracker_) | 462 | StateTracker& state_tracker_, |
| 470 | : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), | 463 | StagingBufferPool& staging_buffer_pool_) |
| 471 | format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { | 464 | : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_}, |
| 465 | util_shaders(program_manager), format_conversion_pass{util_shaders}, | ||
| 466 | resolution{Settings::values.resolution_info} { | ||
| 472 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | 467 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; |
| 473 | for (size_t i = 0; i < TARGETS.size(); ++i) { | 468 | for (size_t i = 0; i < TARGETS.size(); ++i) { |
| 474 | const GLenum target = TARGETS[i]; | 469 | const GLenum target = TARGETS[i]; |
| @@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() { | |||
| 558 | glFinish(); | 553 | glFinish(); |
| 559 | } | 554 | } |
| 560 | 555 | ||
| 561 | ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { | 556 | StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { |
| 562 | return upload_buffers.RequestMap(size, true); | 557 | return staging_buffer_pool.RequestUploadBuffer(size); |
| 563 | } | 558 | } |
| 564 | 559 | ||
| 565 | ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | 560 | StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 566 | return download_buffers.RequestMap(size, false); | 561 | return staging_buffer_pool.RequestDownloadBuffer(size); |
| 567 | } | 562 | } |
| 568 | 563 | ||
| 569 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { | 564 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { |
| @@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | |||
| 648 | is_linear ? GL_LINEAR : GL_NEAREST); | 643 | is_linear ? GL_LINEAR : GL_NEAREST); |
| 649 | } | 644 | } |
| 650 | 645 | ||
| 651 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, | 646 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map, |
| 652 | std::span<const SwizzleParameters> swizzles) { | 647 | std::span<const SwizzleParameters> swizzles) { |
| 653 | switch (image.info.type) { | 648 | switch (image.info.type) { |
| 654 | case ImageType::e2D: | 649 | case ImageType::e2D: |
| @@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept { | |||
| 690 | return device.HasASTC(); | 685 | return device.HasASTC(); |
| 691 | } | 686 | } |
| 692 | 687 | ||
| 693 | TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | ||
| 694 | : storage_flags{storage_flags_}, map_flags{map_flags_} {} | ||
| 695 | |||
| 696 | TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; | ||
| 697 | |||
| 698 | ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, | ||
| 699 | bool insert_fence) { | ||
| 700 | const size_t index = RequestBuffer(requested_size); | ||
| 701 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | ||
| 702 | return ImageBufferMap{ | ||
| 703 | .mapped_span = std::span(maps[index], requested_size), | ||
| 704 | .sync = sync, | ||
| 705 | .buffer = buffers[index].handle, | ||
| 706 | }; | ||
| 707 | } | ||
| 708 | |||
| 709 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { | ||
| 710 | if (const std::optional<size_t> index = FindBuffer(requested_size); index) { | ||
| 711 | return *index; | ||
| 712 | } | ||
| 713 | |||
| 714 | OGLBuffer& buffer = buffers.emplace_back(); | ||
| 715 | buffer.Create(); | ||
| 716 | glNamedBufferStorage(buffer.handle, requested_size, nullptr, | ||
| 717 | storage_flags | GL_MAP_PERSISTENT_BIT); | ||
| 718 | maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, | ||
| 719 | map_flags | GL_MAP_PERSISTENT_BIT))); | ||
| 720 | |||
| 721 | syncs.emplace_back(); | ||
| 722 | sizes.push_back(requested_size); | ||
| 723 | |||
| 724 | ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && | ||
| 725 | maps.size() == sizes.size()); | ||
| 726 | |||
| 727 | return buffers.size() - 1; | ||
| 728 | } | ||
| 729 | |||
| 730 | std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { | ||
| 731 | size_t smallest_buffer = std::numeric_limits<size_t>::max(); | ||
| 732 | std::optional<size_t> found; | ||
| 733 | const size_t num_buffers = sizes.size(); | ||
| 734 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 735 | const size_t buffer_size = sizes[index]; | ||
| 736 | if (buffer_size < requested_size || buffer_size >= smallest_buffer) { | ||
| 737 | continue; | ||
| 738 | } | ||
| 739 | if (syncs[index].handle != 0) { | ||
| 740 | if (!syncs[index].IsSignaled()) { | ||
| 741 | continue; | ||
| 742 | } | ||
| 743 | syncs[index].Release(); | ||
| 744 | } | ||
| 745 | smallest_buffer = buffer_size; | ||
| 746 | found = index; | ||
| 747 | } | ||
| 748 | return found; | ||
| 749 | } | ||
| 750 | |||
| 751 | Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, | 688 | Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, |
| 752 | VAddr cpu_addr_) | 689 | VAddr cpu_addr_) |
| 753 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { | 690 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { |
| @@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, | |||
| 823 | } | 760 | } |
| 824 | } | 761 | } |
| 825 | 762 | ||
| 826 | void Image::UploadMemory(const ImageBufferMap& map, | 763 | void Image::UploadMemory(const StagingBufferMap& map, |
| 827 | std::span<const VideoCommon::BufferImageCopy> copies) { | 764 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 828 | UploadMemory(map.buffer, map.offset, copies); | 765 | UploadMemory(map.buffer, map.offset, copies); |
| 829 | } | 766 | } |
| @@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b | |||
| 870 | } | 807 | } |
| 871 | } | 808 | } |
| 872 | 809 | ||
| 873 | void Image::DownloadMemory(ImageBufferMap& map, | 810 | void Image::DownloadMemory(StagingBufferMap& map, |
| 874 | std::span<const VideoCommon::BufferImageCopy> copies) { | 811 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 875 | DownloadMemory(map.buffer, map.offset, copies); | 812 | DownloadMemory(map.buffer, map.offset, copies); |
| 876 | } | 813 | } |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3e9b3302b..1148b73d7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "shader_recompiler/shader_info.h" | 11 | #include "shader_recompiler/shader_info.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 12 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | ||
| 14 | #include "video_core/renderer_opengl/util_shaders.h" | 15 | #include "video_core/renderer_opengl/util_shaders.h" |
| 15 | #include "video_core/texture_cache/image_view_base.h" | 16 | #include "video_core/texture_cache/image_view_base.h" |
| 16 | #include "video_core/texture_cache/texture_cache_base.h" | 17 | #include "video_core/texture_cache/texture_cache_base.h" |
| @@ -37,15 +38,6 @@ using VideoCommon::Region2D; | |||
| 37 | using VideoCommon::RenderTargets; | 38 | using VideoCommon::RenderTargets; |
| 38 | using VideoCommon::SlotVector; | 39 | using VideoCommon::SlotVector; |
| 39 | 40 | ||
| 40 | struct ImageBufferMap { | ||
| 41 | ~ImageBufferMap(); | ||
| 42 | |||
| 43 | std::span<u8> mapped_span; | ||
| 44 | size_t offset = 0; | ||
| 45 | OGLSync* sync; | ||
| 46 | GLuint buffer; | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct FormatProperties { | 41 | struct FormatProperties { |
| 50 | GLenum compatibility_class; | 42 | GLenum compatibility_class; |
| 51 | bool compatibility_by_size; | 43 | bool compatibility_by_size; |
| @@ -74,14 +66,15 @@ class TextureCacheRuntime { | |||
| 74 | 66 | ||
| 75 | public: | 67 | public: |
| 76 | explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, | 68 | explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, |
| 77 | StateTracker& state_tracker); | 69 | StateTracker& state_tracker, |
| 70 | StagingBufferPool& staging_buffer_pool); | ||
| 78 | ~TextureCacheRuntime(); | 71 | ~TextureCacheRuntime(); |
| 79 | 72 | ||
| 80 | void Finish(); | 73 | void Finish(); |
| 81 | 74 | ||
| 82 | ImageBufferMap UploadStagingBuffer(size_t size); | 75 | StagingBufferMap UploadStagingBuffer(size_t size); |
| 83 | 76 | ||
| 84 | ImageBufferMap DownloadStagingBuffer(size_t size); | 77 | StagingBufferMap DownloadStagingBuffer(size_t size); |
| 85 | 78 | ||
| 86 | u64 GetDeviceLocalMemory() const { | 79 | u64 GetDeviceLocalMemory() const { |
| 87 | return device_access_memory; | 80 | return device_access_memory; |
| @@ -120,7 +113,7 @@ public: | |||
| 120 | const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, | 113 | const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, |
| 121 | Tegra::Engines::Fermi2D::Operation operation); | 114 | Tegra::Engines::Fermi2D::Operation operation); |
| 122 | 115 | ||
| 123 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, | 116 | void AccelerateImageUpload(Image& image, const StagingBufferMap& map, |
| 124 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 117 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 125 | 118 | ||
| 126 | void InsertUploadMemoryBarrier(); | 119 | void InsertUploadMemoryBarrier(); |
| @@ -149,35 +142,16 @@ public: | |||
| 149 | } | 142 | } |
| 150 | 143 | ||
| 151 | private: | 144 | private: |
| 152 | struct StagingBuffers { | ||
| 153 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | ||
| 154 | ~StagingBuffers(); | ||
| 155 | |||
| 156 | ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); | ||
| 157 | |||
| 158 | size_t RequestBuffer(size_t requested_size); | ||
| 159 | |||
| 160 | std::optional<size_t> FindBuffer(size_t requested_size); | ||
| 161 | |||
| 162 | std::vector<OGLSync> syncs; | ||
| 163 | std::vector<OGLBuffer> buffers; | ||
| 164 | std::vector<u8*> maps; | ||
| 165 | std::vector<size_t> sizes; | ||
| 166 | GLenum storage_flags; | ||
| 167 | GLenum map_flags; | ||
| 168 | }; | ||
| 169 | |||
| 170 | const Device& device; | 145 | const Device& device; |
| 171 | StateTracker& state_tracker; | 146 | StateTracker& state_tracker; |
| 147 | StagingBufferPool& staging_buffer_pool; | ||
| 148 | |||
| 172 | UtilShaders util_shaders; | 149 | UtilShaders util_shaders; |
| 173 | FormatConversionPass format_conversion_pass; | 150 | FormatConversionPass format_conversion_pass; |
| 174 | 151 | ||
| 175 | std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; | 152 | std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; |
| 176 | bool has_broken_texture_view_formats = false; | 153 | bool has_broken_texture_view_formats = false; |
| 177 | 154 | ||
| 178 | StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; | ||
| 179 | StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; | ||
| 180 | |||
| 181 | OGLTexture null_image_1d_array; | 155 | OGLTexture null_image_1d_array; |
| 182 | OGLTexture null_image_cube_array; | 156 | OGLTexture null_image_cube_array; |
| 183 | OGLTexture null_image_3d; | 157 | OGLTexture null_image_3d; |
| @@ -213,7 +187,7 @@ public: | |||
| 213 | void UploadMemory(GLuint buffer_handle, size_t buffer_offset, | 187 | void UploadMemory(GLuint buffer_handle, size_t buffer_offset, |
| 214 | std::span<const VideoCommon::BufferImageCopy> copies); | 188 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 215 | 189 | ||
| 216 | void UploadMemory(const ImageBufferMap& map, | 190 | void UploadMemory(const StagingBufferMap& map, |
| 217 | std::span<const VideoCommon::BufferImageCopy> copies); | 191 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 218 | 192 | ||
| 219 | void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, | 193 | void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, |
| @@ -222,7 +196,8 @@ public: | |||
| 222 | void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, | 196 | void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, |
| 223 | std::span<const VideoCommon::BufferImageCopy> copies); | 197 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 224 | 198 | ||
| 225 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | 199 | void DownloadMemory(StagingBufferMap& map, |
| 200 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 226 | 201 | ||
| 227 | GLuint StorageHandle() noexcept; | 202 | GLuint StorageHandle() noexcept; |
| 228 | 203 | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2c7ac210b..544982d18 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | 19 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" |
| 20 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 20 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_util.h" | 21 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 22 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 23 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 23 | #include "video_core/renderer_opengl/util_shaders.h" | 24 | #include "video_core/renderer_opengl/util_shaders.h" |
| 24 | #include "video_core/texture_cache/accelerated_swizzle.h" | 25 | #include "video_core/texture_cache/accelerated_swizzle.h" |
| @@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 63 | 64 | ||
| 64 | UtilShaders::~UtilShaders() = default; | 65 | UtilShaders::~UtilShaders() = default; |
| 65 | 66 | ||
| 66 | void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | 67 | void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map, |
| 67 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | 68 | std::span<const VideoCommon::SwizzleParameters> swizzles) { |
| 68 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; | 69 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; |
| 69 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 70 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| @@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 111 | program_manager.RestoreGuestCompute(); | 112 | program_manager.RestoreGuestCompute(); |
| 112 | } | 113 | } |
| 113 | 114 | ||
| 114 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | 115 | void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map, |
| 115 | std::span<const SwizzleParameters> swizzles) { | 116 | std::span<const SwizzleParameters> swizzles) { |
| 116 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | 117 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |
| 117 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | 118 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; |
| @@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | |||
| 148 | program_manager.RestoreGuestCompute(); | 149 | program_manager.RestoreGuestCompute(); |
| 149 | } | 150 | } |
| 150 | 151 | ||
| 151 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | 152 | void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map, |
| 152 | std::span<const SwizzleParameters> swizzles) { | 153 | std::span<const SwizzleParameters> swizzles) { |
| 153 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; | 154 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; |
| 154 | 155 | ||
| @@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | |||
| 189 | program_manager.RestoreGuestCompute(); | 190 | program_manager.RestoreGuestCompute(); |
| 190 | } | 191 | } |
| 191 | 192 | ||
| 192 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, | 193 | void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map, |
| 193 | std::span<const SwizzleParameters> swizzles) { | 194 | std::span<const SwizzleParameters> swizzles) { |
| 194 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | 195 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |
| 195 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; | 196 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; |
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 9013808e7..feecd404c 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -16,23 +16,23 @@ namespace OpenGL { | |||
| 16 | class Image; | 16 | class Image; |
| 17 | class ProgramManager; | 17 | class ProgramManager; |
| 18 | 18 | ||
| 19 | struct ImageBufferMap; | 19 | struct StagingBufferMap; |
| 20 | 20 | ||
| 21 | class UtilShaders { | 21 | class UtilShaders { |
| 22 | public: | 22 | public: |
| 23 | explicit UtilShaders(ProgramManager& program_manager); | 23 | explicit UtilShaders(ProgramManager& program_manager); |
| 24 | ~UtilShaders(); | 24 | ~UtilShaders(); |
| 25 | 25 | ||
| 26 | void ASTCDecode(Image& image, const ImageBufferMap& map, | 26 | void ASTCDecode(Image& image, const StagingBufferMap& map, |
| 27 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 27 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 28 | 28 | ||
| 29 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | 29 | void BlockLinearUpload2D(Image& image, const StagingBufferMap& map, |
| 30 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 30 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 31 | 31 | ||
| 32 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | 32 | void BlockLinearUpload3D(Image& image, const StagingBufferMap& map, |
| 33 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 33 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 34 | 34 | ||
| 35 | void PitchUpload(Image& image, const ImageBufferMap& map, | 35 | void PitchUpload(Image& image, const StagingBufferMap& map, |
| 36 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 36 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 37 | 37 | ||
| 38 | void CopyBC4(Image& dst_image, Image& src_image, | 38 | void CopyBC4(Image& dst_image, Image& src_image, |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 794dd0758..92b4f7859 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -157,6 +157,7 @@ struct BufferCacheParams { | |||
| 157 | static constexpr bool USE_MEMORY_MAPS = true; | 157 | static constexpr bool USE_MEMORY_MAPS = true; |
| 158 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | 158 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; |
| 159 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | 159 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; |
| 160 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; | ||
| 160 | }; | 161 | }; |
| 161 | 162 | ||
| 162 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 163 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |