diff options
| author | 2023-12-01 09:16:56 -0500 | |
|---|---|---|
| committer | 2023-12-01 09:16:56 -0500 | |
| commit | 1c21d6c2c2c82bea668f37beec150c6edcde1b81 (patch) | |
| tree | 8c1c0c7d99b91dd705a5fd5477617f434bc42b11 /src/video_core/renderer_opengl | |
| parent | Merge pull request #12234 from abouvier/unbundle-gamemode (diff) | |
| parent | gl_texture_cache: Enable async downloads (diff) | |
| download | yuzu-1c21d6c2c2c82bea668f37beec150c6edcde1b81.tar.gz yuzu-1c21d6c2c2c82bea668f37beec150c6edcde1b81.tar.xz yuzu-1c21d6c2c2c82bea668f37beec150c6edcde1b81.zip | |
Merge pull request #12056 from ameerj/opengl-neglect
OpenGL: Implement async downloads in buffer and texture caches
Diffstat (limited to 'src/video_core/renderer_opengl')
6 files changed, 74 insertions, 44 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index dfd696de6..ed188b435 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -146,8 +146,12 @@ StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { | |||
| 146 | return staging_buffer_pool.RequestUploadBuffer(size); | 146 | return staging_buffer_pool.RequestUploadBuffer(size); |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | 149 | StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { |
| 150 | return staging_buffer_pool.RequestDownloadBuffer(size); | 150 | return staging_buffer_pool.RequestDownloadBuffer(size, deferred); |
| 151 | } | ||
| 152 | |||
| 153 | void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { | ||
| 154 | staging_buffer_pool.FreeDeferredStagingBuffer(buffer); | ||
| 151 | } | 155 | } |
| 152 | 156 | ||
| 153 | u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { | 157 | u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 000f29a82..1e8708f59 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -66,7 +66,9 @@ public: | |||
| 66 | 66 | ||
| 67 | [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); | 67 | [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); |
| 68 | 68 | ||
| 69 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); | 69 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false); |
| 70 | |||
| 71 | void FreeDeferredStagingBuffer(StagingBufferMap& buffer); | ||
| 70 | 72 | ||
| 71 | bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { | 73 | bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { |
| 72 | return false; | 74 | return false; |
| @@ -246,7 +248,7 @@ struct BufferCacheParams { | |||
| 246 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 248 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 247 | static constexpr bool USE_MEMORY_MAPS = true; | 249 | static constexpr bool USE_MEMORY_MAPS = true; |
| 248 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | 250 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; |
| 249 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | 251 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; |
| 250 | 252 | ||
| 251 | // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads | 253 | // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads |
| 252 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; | 254 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; |
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp index bbb06e51f..cadad6507 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp | |||
| @@ -28,63 +28,69 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | |||
| 28 | 28 | ||
| 29 | StagingBuffers::~StagingBuffers() = default; | 29 | StagingBuffers::~StagingBuffers() = default; |
| 30 | 30 | ||
| 31 | StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { | 31 | StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence, |
| 32 | bool deferred) { | ||
| 32 | MICROPROFILE_SCOPE(OpenGL_BufferRequest); | 33 | MICROPROFILE_SCOPE(OpenGL_BufferRequest); |
| 33 | 34 | ||
| 34 | const size_t index = RequestBuffer(requested_size); | 35 | const size_t index = RequestBuffer(requested_size); |
| 35 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | 36 | OGLSync* const sync = insert_fence ? &allocs[index].sync : nullptr; |
| 36 | sync_indices[index] = insert_fence ? ++current_sync_index : 0; | 37 | allocs[index].sync_index = insert_fence ? ++current_sync_index : 0; |
| 38 | allocs[index].deferred = deferred; | ||
| 37 | return StagingBufferMap{ | 39 | return StagingBufferMap{ |
| 38 | .mapped_span = std::span(maps[index], requested_size), | 40 | .mapped_span = std::span(allocs[index].map, requested_size), |
| 39 | .sync = sync, | 41 | .sync = sync, |
| 40 | .buffer = buffers[index].handle, | 42 | .buffer = allocs[index].buffer.handle, |
| 43 | .index = index, | ||
| 41 | }; | 44 | }; |
| 42 | } | 45 | } |
| 43 | 46 | ||
| 47 | void StagingBuffers::FreeDeferredStagingBuffer(size_t index) { | ||
| 48 | ASSERT(allocs[index].deferred); | ||
| 49 | allocs[index].deferred = false; | ||
| 50 | } | ||
| 51 | |||
| 44 | size_t StagingBuffers::RequestBuffer(size_t requested_size) { | 52 | size_t StagingBuffers::RequestBuffer(size_t requested_size) { |
| 45 | if (const std::optional<size_t> index = FindBuffer(requested_size); index) { | 53 | if (const std::optional<size_t> index = FindBuffer(requested_size); index) { |
| 46 | return *index; | 54 | return *index; |
| 47 | } | 55 | } |
| 48 | 56 | StagingBufferAlloc alloc; | |
| 49 | OGLBuffer& buffer = buffers.emplace_back(); | 57 | alloc.buffer.Create(); |
| 50 | buffer.Create(); | ||
| 51 | const auto next_pow2_size = Common::NextPow2(requested_size); | 58 | const auto next_pow2_size = Common::NextPow2(requested_size); |
| 52 | glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, | 59 | glNamedBufferStorage(alloc.buffer.handle, next_pow2_size, nullptr, |
| 53 | storage_flags | GL_MAP_PERSISTENT_BIT); | 60 | storage_flags | GL_MAP_PERSISTENT_BIT); |
| 54 | maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, | 61 | alloc.map = static_cast<u8*>(glMapNamedBufferRange(alloc.buffer.handle, 0, next_pow2_size, |
| 55 | map_flags | GL_MAP_PERSISTENT_BIT))); | 62 | map_flags | GL_MAP_PERSISTENT_BIT)); |
| 56 | syncs.emplace_back(); | 63 | alloc.size = next_pow2_size; |
| 57 | sync_indices.emplace_back(); | 64 | allocs.emplace_back(std::move(alloc)); |
| 58 | sizes.push_back(next_pow2_size); | 65 | return allocs.size() - 1; |
| 59 | |||
| 60 | ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && | ||
| 61 | maps.size() == sizes.size()); | ||
| 62 | |||
| 63 | return buffers.size() - 1; | ||
| 64 | } | 66 | } |
| 65 | 67 | ||
| 66 | std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { | 68 | std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { |
| 67 | size_t known_unsignaled_index = current_sync_index + 1; | 69 | size_t known_unsignaled_index = current_sync_index + 1; |
| 68 | size_t smallest_buffer = std::numeric_limits<size_t>::max(); | 70 | size_t smallest_buffer = std::numeric_limits<size_t>::max(); |
| 69 | std::optional<size_t> found; | 71 | std::optional<size_t> found; |
| 70 | const size_t num_buffers = sizes.size(); | 72 | const size_t num_buffers = allocs.size(); |
| 71 | for (size_t index = 0; index < num_buffers; ++index) { | 73 | for (size_t index = 0; index < num_buffers; ++index) { |
| 72 | const size_t buffer_size = sizes[index]; | 74 | StagingBufferAlloc& alloc = allocs[index]; |
| 75 | const size_t buffer_size = alloc.size; | ||
| 73 | if (buffer_size < requested_size || buffer_size >= smallest_buffer) { | 76 | if (buffer_size < requested_size || buffer_size >= smallest_buffer) { |
| 74 | continue; | 77 | continue; |
| 75 | } | 78 | } |
| 76 | if (syncs[index].handle != 0) { | 79 | if (alloc.deferred) { |
| 77 | if (sync_indices[index] >= known_unsignaled_index) { | 80 | continue; |
| 81 | } | ||
| 82 | if (alloc.sync.handle != 0) { | ||
| 83 | if (alloc.sync_index >= known_unsignaled_index) { | ||
| 78 | // This fence is later than a fence that is known to not be signaled | 84 | // This fence is later than a fence that is known to not be signaled |
| 79 | continue; | 85 | continue; |
| 80 | } | 86 | } |
| 81 | if (!syncs[index].IsSignaled()) { | 87 | if (!alloc.sync.IsSignaled()) { |
| 82 | // Since this fence hasn't been signaled, it's safe to assume all later | 88 | // Since this fence hasn't been signaled, it's safe to assume all later |
| 83 | // fences haven't been signaled either | 89 | // fences haven't been signaled either |
| 84 | known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); | 90 | known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index); |
| 85 | continue; | 91 | continue; |
| 86 | } | 92 | } |
| 87 | syncs[index].Release(); | 93 | alloc.sync.Release(); |
| 88 | } | 94 | } |
| 89 | smallest_buffer = buffer_size; | 95 | smallest_buffer = buffer_size; |
| 90 | found = index; | 96 | found = index; |
| @@ -143,8 +149,12 @@ StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { | |||
| 143 | return upload_buffers.RequestMap(size, true); | 149 | return upload_buffers.RequestMap(size, true); |
| 144 | } | 150 | } |
| 145 | 151 | ||
| 146 | StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { | 152 | StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size, bool deferred) { |
| 147 | return download_buffers.RequestMap(size, false); | 153 | return download_buffers.RequestMap(size, false, deferred); |
| 154 | } | ||
| 155 | |||
| 156 | void StagingBufferPool::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { | ||
| 157 | download_buffers.FreeDeferredStagingBuffer(buffer.index); | ||
| 148 | } | 158 | } |
| 149 | 159 | ||
| 150 | } // namespace OpenGL | 160 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 60f72d3a0..07a56b4d2 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h | |||
| @@ -26,23 +26,30 @@ struct StagingBufferMap { | |||
| 26 | size_t offset = 0; | 26 | size_t offset = 0; |
| 27 | OGLSync* sync; | 27 | OGLSync* sync; |
| 28 | GLuint buffer; | 28 | GLuint buffer; |
| 29 | size_t index; | ||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | struct StagingBuffers { | 32 | struct StagingBuffers { |
| 32 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | 33 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); |
| 33 | ~StagingBuffers(); | 34 | ~StagingBuffers(); |
| 34 | 35 | ||
| 35 | StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); | 36 | StagingBufferMap RequestMap(size_t requested_size, bool insert_fence, bool deferred = false); |
| 37 | |||
| 38 | void FreeDeferredStagingBuffer(size_t index); | ||
| 36 | 39 | ||
| 37 | size_t RequestBuffer(size_t requested_size); | 40 | size_t RequestBuffer(size_t requested_size); |
| 38 | 41 | ||
| 39 | std::optional<size_t> FindBuffer(size_t requested_size); | 42 | std::optional<size_t> FindBuffer(size_t requested_size); |
| 40 | 43 | ||
| 41 | std::vector<OGLSync> syncs; | 44 | struct StagingBufferAlloc { |
| 42 | std::vector<OGLBuffer> buffers; | 45 | OGLSync sync; |
| 43 | std::vector<u8*> maps; | 46 | OGLBuffer buffer; |
| 44 | std::vector<size_t> sizes; | 47 | u8* map; |
| 45 | std::vector<size_t> sync_indices; | 48 | size_t size; |
| 49 | size_t sync_index; | ||
| 50 | bool deferred; | ||
| 51 | }; | ||
| 52 | std::vector<StagingBufferAlloc> allocs; | ||
| 46 | GLenum storage_flags; | 53 | GLenum storage_flags; |
| 47 | GLenum map_flags; | 54 | GLenum map_flags; |
| 48 | size_t current_sync_index = 0; | 55 | size_t current_sync_index = 0; |
| @@ -85,7 +92,8 @@ public: | |||
| 85 | ~StagingBufferPool() = default; | 92 | ~StagingBufferPool() = default; |
| 86 | 93 | ||
| 87 | StagingBufferMap RequestUploadBuffer(size_t size); | 94 | StagingBufferMap RequestUploadBuffer(size_t size); |
| 88 | StagingBufferMap RequestDownloadBuffer(size_t size); | 95 | StagingBufferMap RequestDownloadBuffer(size_t size, bool deferred = false); |
| 96 | void FreeDeferredStagingBuffer(StagingBufferMap& buffer); | ||
| 89 | 97 | ||
| 90 | private: | 98 | private: |
| 91 | StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; | 99 | StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 512eef575..66a5ca03e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -557,8 +557,12 @@ StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { | |||
| 557 | return staging_buffer_pool.RequestUploadBuffer(size); | 557 | return staging_buffer_pool.RequestUploadBuffer(size); |
| 558 | } | 558 | } |
| 559 | 559 | ||
| 560 | StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | 560 | StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { |
| 561 | return staging_buffer_pool.RequestDownloadBuffer(size); | 561 | return staging_buffer_pool.RequestDownloadBuffer(size, deferred); |
| 562 | } | ||
| 563 | |||
| 564 | void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { | ||
| 565 | staging_buffer_pool.FreeDeferredStagingBuffer(buffer); | ||
| 562 | } | 566 | } |
| 563 | 567 | ||
| 564 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { | 568 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e71b87e99..34870c81f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -74,7 +74,9 @@ public: | |||
| 74 | 74 | ||
| 75 | StagingBufferMap UploadStagingBuffer(size_t size); | 75 | StagingBufferMap UploadStagingBuffer(size_t size); |
| 76 | 76 | ||
| 77 | StagingBufferMap DownloadStagingBuffer(size_t size); | 77 | StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false); |
| 78 | |||
| 79 | void FreeDeferredStagingBuffer(StagingBufferMap& buffer); | ||
| 78 | 80 | ||
| 79 | u64 GetDeviceLocalMemory() const { | 81 | u64 GetDeviceLocalMemory() const { |
| 80 | return device_access_memory; | 82 | return device_access_memory; |
| @@ -359,7 +361,7 @@ struct TextureCacheParams { | |||
| 359 | static constexpr bool FRAMEBUFFER_BLITS = true; | 361 | static constexpr bool FRAMEBUFFER_BLITS = true; |
| 360 | static constexpr bool HAS_EMULATED_COPIES = true; | 362 | static constexpr bool HAS_EMULATED_COPIES = true; |
| 361 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | 363 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; |
| 362 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | 364 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; |
| 363 | 365 | ||
| 364 | using Runtime = OpenGL::TextureCacheRuntime; | 366 | using Runtime = OpenGL::TextureCacheRuntime; |
| 365 | using Image = OpenGL::Image; | 367 | using Image = OpenGL::Image; |
| @@ -367,7 +369,7 @@ struct TextureCacheParams { | |||
| 367 | using ImageView = OpenGL::ImageView; | 369 | using ImageView = OpenGL::ImageView; |
| 368 | using Sampler = OpenGL::Sampler; | 370 | using Sampler = OpenGL::Sampler; |
| 369 | using Framebuffer = OpenGL::Framebuffer; | 371 | using Framebuffer = OpenGL::Framebuffer; |
| 370 | using AsyncBuffer = u32; | 372 | using AsyncBuffer = OpenGL::StagingBufferMap; |
| 371 | using BufferType = GLuint; | 373 | using BufferType = GLuint; |
| 372 | }; | 374 | }; |
| 373 | 375 | ||