diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 62 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 56 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 89 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_stream_buffer.h | 6 |
14 files changed, 277 insertions, 213 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bae1d527c..cf8bdd021 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -41,7 +41,11 @@ class BufferCache { | |||
| 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; | 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; |
| 42 | 42 | ||
| 43 | public: | 43 | public: |
| 44 | using BufferInfo = std::pair<BufferType, u64>; | 44 | struct BufferInfo { |
| 45 | BufferType handle; | ||
| 46 | u64 offset; | ||
| 47 | u64 address; | ||
| 48 | }; | ||
| 45 | 49 | ||
| 46 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | 50 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
| 47 | bool is_written = false, bool use_fast_cbuf = false) { | 51 | bool is_written = false, bool use_fast_cbuf = false) { |
| @@ -50,7 +54,7 @@ public: | |||
| 50 | auto& memory_manager = system.GPU().MemoryManager(); | 54 | auto& memory_manager = system.GPU().MemoryManager(); |
| 51 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); | 55 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); |
| 52 | if (!cpu_addr_opt) { | 56 | if (!cpu_addr_opt) { |
| 53 | return {GetEmptyBuffer(size), 0}; | 57 | return GetEmptyBuffer(size); |
| 54 | } | 58 | } |
| 55 | const VAddr cpu_addr = *cpu_addr_opt; | 59 | const VAddr cpu_addr = *cpu_addr_opt; |
| 56 | 60 | ||
| @@ -88,7 +92,7 @@ public: | |||
| 88 | Buffer* const block = GetBlock(cpu_addr, size); | 92 | Buffer* const block = GetBlock(cpu_addr, size); |
| 89 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); | 93 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 90 | if (!map) { | 94 | if (!map) { |
| 91 | return {GetEmptyBuffer(size), 0}; | 95 | return GetEmptyBuffer(size); |
| 92 | } | 96 | } |
| 93 | if (is_written) { | 97 | if (is_written) { |
| 94 | map->MarkAsModified(true, GetModifiedTicks()); | 98 | map->MarkAsModified(true, GetModifiedTicks()); |
| @@ -101,7 +105,7 @@ public: | |||
| 101 | } | 105 | } |
| 102 | } | 106 | } |
| 103 | 107 | ||
| 104 | return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; | 108 | return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; |
| 105 | } | 109 | } |
| 106 | 110 | ||
| 107 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | 111 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
| @@ -254,27 +258,17 @@ public: | |||
| 254 | committed_flushes.pop_front(); | 258 | committed_flushes.pop_front(); |
| 255 | } | 259 | } |
| 256 | 260 | ||
| 257 | virtual BufferType GetEmptyBuffer(std::size_t size) = 0; | 261 | virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; |
| 258 | 262 | ||
| 259 | protected: | 263 | protected: |
| 260 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 264 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 261 | std::unique_ptr<StreamBuffer> stream_buffer_) | 265 | std::unique_ptr<StreamBuffer> stream_buffer) |
| 262 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, | 266 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} |
| 263 | stream_buffer_handle{stream_buffer->Handle()} {} | ||
| 264 | 267 | ||
| 265 | ~BufferCache() = default; | 268 | ~BufferCache() = default; |
| 266 | 269 | ||
| 267 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | 270 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
| 268 | 271 | ||
| 269 | virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 270 | const u8* data) = 0; | ||
| 271 | |||
| 272 | virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 273 | u8* data) = 0; | ||
| 274 | |||
| 275 | virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 276 | std::size_t dst_offset, std::size_t size) = 0; | ||
| 277 | |||
| 278 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | 272 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { |
| 279 | return {}; | 273 | return {}; |
| 280 | } | 274 | } |
| @@ -336,11 +330,11 @@ private: | |||
| 336 | const VAddr cpu_addr_end = cpu_addr + size; | 330 | const VAddr cpu_addr_end = cpu_addr + size; |
| 337 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 331 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 338 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | 332 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
| 339 | UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); | 333 | block->Upload(block->Offset(cpu_addr), size, host_ptr); |
| 340 | } else { | 334 | } else { |
| 341 | staging_buffer.resize(size); | 335 | staging_buffer.resize(size); |
| 342 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 336 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
| 343 | UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); | 337 | block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); |
| 344 | } | 338 | } |
| 345 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); | 339 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); |
| 346 | } | 340 | } |
| @@ -399,7 +393,7 @@ private: | |||
| 399 | } | 393 | } |
| 400 | staging_buffer.resize(size); | 394 | staging_buffer.resize(size); |
| 401 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | 395 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); |
| 402 | UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); | 396 | block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); |
| 403 | } | 397 | } |
| 404 | } | 398 | } |
| 405 | 399 | ||
| @@ -436,7 +430,7 @@ private: | |||
| 436 | 430 | ||
| 437 | const std::size_t size = map->end - map->start; | 431 | const std::size_t size = map->end - map->start; |
| 438 | staging_buffer.resize(size); | 432 | staging_buffer.resize(size); |
| 439 | DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); | 433 | block->Download(block->Offset(map->start), size, staging_buffer.data()); |
| 440 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); | 434 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); |
| 441 | map->MarkAsModified(false, 0); | 435 | map->MarkAsModified(false, 0); |
| 442 | } | 436 | } |
| @@ -449,7 +443,7 @@ private: | |||
| 449 | 443 | ||
| 450 | buffer_ptr += size; | 444 | buffer_ptr += size; |
| 451 | buffer_offset += size; | 445 | buffer_offset += size; |
| 452 | return {stream_buffer_handle, uploaded_offset}; | 446 | return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; |
| 453 | } | 447 | } |
| 454 | 448 | ||
| 455 | void AlignBuffer(std::size_t alignment) { | 449 | void AlignBuffer(std::size_t alignment) { |
| @@ -464,7 +458,7 @@ private: | |||
| 464 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; | 458 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; |
| 465 | const VAddr cpu_addr = buffer->CpuAddr(); | 459 | const VAddr cpu_addr = buffer->CpuAddr(); |
| 466 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); | 460 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); |
| 467 | CopyBlock(*buffer, *new_buffer, 0, 0, old_size); | 461 | new_buffer->CopyFrom(*buffer, 0, 0, old_size); |
| 468 | QueueDestruction(std::move(buffer)); | 462 | QueueDestruction(std::move(buffer)); |
| 469 | 463 | ||
| 470 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; | 464 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
| @@ -486,8 +480,8 @@ private: | |||
| 486 | const std::size_t new_size = size_1 + size_2; | 480 | const std::size_t new_size = size_1 + size_2; |
| 487 | 481 | ||
| 488 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); | 482 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); |
| 489 | CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); | 483 | new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); |
| 490 | CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); | 484 | new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); |
| 491 | QueueDestruction(std::move(first)); | 485 | QueueDestruction(std::move(first)); |
| 492 | QueueDestruction(std::move(second)); | 486 | QueueDestruction(std::move(second)); |
| 493 | 487 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..d9f7b4cc6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -22,21 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 22 | 22 | ||
| 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 24 | 24 | ||
| 25 | Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { | 25 | Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) |
| 26 | : VideoCommon::BufferBlock{cpu_addr, size} { | ||
| 26 | gl_buffer.Create(); | 27 | gl_buffer.Create(); |
| 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 29 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); | ||
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | ||
| 32 | } | ||
| 28 | } | 33 | } |
| 29 | 34 | ||
| 30 | Buffer::~Buffer() = default; | 35 | Buffer::~Buffer() = default; |
| 31 | 36 | ||
| 37 | void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { | ||
| 38 | glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), | ||
| 39 | data); | ||
| 40 | } | ||
| 41 | |||
| 42 | void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { | ||
| 43 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||
| 44 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 45 | glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), | ||
| 46 | data); | ||
| 47 | } | ||
| 48 | |||
| 49 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||
| 50 | std::size_t size) const { | ||
| 51 | glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), | ||
| 52 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | ||
| 53 | } | ||
| 54 | |||
| 32 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 55 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 33 | const Device& device, std::size_t stream_size) | 56 | const Device& device_, std::size_t stream_size) |
| 34 | : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { | 57 | : GenericBufferCache{rasterizer, system, |
| 58 | std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, | ||
| 59 | device{device_} { | ||
| 35 | if (!device.HasFastBufferSubData()) { | 60 | if (!device.HasFastBufferSubData()) { |
| 36 | return; | 61 | return; |
| 37 | } | 62 | } |
| 38 | 63 | ||
| 39 | static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); | 64 | static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); |
| 40 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 65 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); |
| 41 | for (const GLuint cbuf : cbufs) { | 66 | for (const GLuint cbuf : cbufs) { |
| 42 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); | 67 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); |
| @@ -48,39 +73,20 @@ OGLBufferCache::~OGLBufferCache() { | |||
| 48 | } | 73 | } |
| 49 | 74 | ||
| 50 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 75 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 51 | return std::make_shared<Buffer>(cpu_addr, size); | 76 | return std::make_shared<Buffer>(device, cpu_addr, size); |
| 52 | } | 77 | } |
| 53 | 78 | ||
| 54 | GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { | 79 | OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 55 | return 0; | 80 | return {0, 0, 0}; |
| 56 | } | ||
| 57 | |||
| 58 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 59 | const u8* data) { | ||
| 60 | glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 61 | static_cast<GLsizeiptr>(size), data); | ||
| 62 | } | ||
| 63 | |||
| 64 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 65 | u8* data) { | ||
| 66 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||
| 67 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 68 | glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 69 | static_cast<GLsizeiptr>(size), data); | ||
| 70 | } | ||
| 71 | |||
| 72 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 73 | std::size_t dst_offset, std::size_t size) { | ||
| 74 | glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), | ||
| 75 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | ||
| 76 | } | 81 | } |
| 77 | 82 | ||
| 78 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | 83 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, |
| 79 | std::size_t size) { | 84 | std::size_t size) { |
| 80 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | 85 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); |
| 81 | const GLuint cbuf = cbufs[cbuf_cursor++]; | 86 | const GLuint cbuf = cbufs[cbuf_cursor++]; |
| 87 | |||
| 82 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | 88 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); |
| 83 | return {cbuf, 0}; | 89 | return {cbuf, 0, 0}; |
| 84 | } | 90 | } |
| 85 | 91 | ||
| 86 | } // namespace OpenGL | 92 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..59d95adbc 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -25,15 +25,27 @@ class RasterizerOpenGL; | |||
| 25 | 25 | ||
| 26 | class Buffer : public VideoCommon::BufferBlock { | 26 | class Buffer : public VideoCommon::BufferBlock { |
| 27 | public: | 27 | public: |
| 28 | explicit Buffer(VAddr cpu_addr, const std::size_t size); | 28 | explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); |
| 29 | ~Buffer(); | 29 | ~Buffer(); |
| 30 | 30 | ||
| 31 | GLuint Handle() const { | 31 | void Upload(std::size_t offset, std::size_t size, const u8* data) const; |
| 32 | |||
| 33 | void Download(std::size_t offset, std::size_t size, u8* data) const; | ||
| 34 | |||
| 35 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||
| 36 | std::size_t size) const; | ||
| 37 | |||
| 38 | GLuint Handle() const noexcept { | ||
| 32 | return gl_buffer.handle; | 39 | return gl_buffer.handle; |
| 33 | } | 40 | } |
| 34 | 41 | ||
| 42 | u64 Address() const noexcept { | ||
| 43 | return gpu_address; | ||
| 44 | } | ||
| 45 | |||
| 35 | private: | 46 | private: |
| 36 | OGLBuffer gl_buffer; | 47 | OGLBuffer gl_buffer; |
| 48 | u64 gpu_address = 0; | ||
| 37 | }; | 49 | }; |
| 38 | 50 | ||
| 39 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | 51 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; |
| @@ -43,7 +55,7 @@ public: | |||
| 43 | const Device& device, std::size_t stream_size); | 55 | const Device& device, std::size_t stream_size); |
| 44 | ~OGLBufferCache(); | 56 | ~OGLBufferCache(); |
| 45 | 57 | ||
| 46 | GLuint GetEmptyBuffer(std::size_t) override; | 58 | BufferInfo GetEmptyBuffer(std::size_t) override; |
| 47 | 59 | ||
| 48 | void Acquire() noexcept { | 60 | void Acquire() noexcept { |
| 49 | cbuf_cursor = 0; | 61 | cbuf_cursor = 0; |
| @@ -52,22 +64,16 @@ public: | |||
| 52 | protected: | 64 | protected: |
| 53 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 65 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 54 | 66 | ||
| 55 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 56 | const u8* data) override; | ||
| 57 | |||
| 58 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 59 | u8* data) override; | ||
| 60 | |||
| 61 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 62 | std::size_t dst_offset, std::size_t size) override; | ||
| 63 | |||
| 64 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; | 67 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; |
| 65 | 68 | ||
| 66 | private: | 69 | private: |
| 70 | static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 71 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 72 | |||
| 73 | const Device& device; | ||
| 74 | |||
| 67 | std::size_t cbuf_cursor = 0; | 75 | std::size_t cbuf_cursor = 0; |
| 68 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | 76 | std::array<GLuint, NUM_CBUFS> cbufs{}; |
| 69 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> | ||
| 70 | cbufs; | ||
| 71 | }; | 77 | }; |
| 72 | 78 | ||
| 73 | } // namespace OpenGL | 79 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1011c7738..447a19595 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -188,16 +188,32 @@ bool IsASTCSupported() { | |||
| 188 | return true; | 188 | return true; |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | /// @brief Returns true when a GL_RENDERER is a Turing GPU | ||
| 192 | /// @param renderer GL_RENDERER string | ||
| 193 | bool IsTuring(std::string_view renderer) { | ||
| 194 | static constexpr std::array<std::string_view, 12> TURING_GPUS = { | ||
| 195 | "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070", | ||
| 196 | "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000", | ||
| 197 | "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4", | ||
| 198 | }; | ||
| 199 | return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(), | ||
| 200 | [renderer](std::string_view candidate) { | ||
| 201 | return renderer.find(candidate) != std::string_view::npos; | ||
| 202 | }); | ||
| 203 | } | ||
| 204 | |||
| 191 | } // Anonymous namespace | 205 | } // Anonymous namespace |
| 192 | 206 | ||
| 193 | Device::Device() | 207 | Device::Device() |
| 194 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | 208 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { |
| 195 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 209 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 210 | const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); | ||
| 196 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); | 211 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); |
| 197 | const std::vector extensions = GetExtensions(); | 212 | const std::vector extensions = GetExtensions(); |
| 198 | 213 | ||
| 199 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 214 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 200 | const bool is_amd = vendor == "ATI Technologies Inc."; | 215 | const bool is_amd = vendor == "ATI Technologies Inc."; |
| 216 | const bool is_turing = is_nvidia && IsTuring(renderer); | ||
| 201 | 217 | ||
| 202 | bool disable_fast_buffer_sub_data = false; | 218 | bool disable_fast_buffer_sub_data = false; |
| 203 | if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { | 219 | if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { |
| @@ -221,8 +237,16 @@ Device::Device() | |||
| 221 | has_variable_aoffi = TestVariableAoffi(); | 237 | has_variable_aoffi = TestVariableAoffi(); |
| 222 | has_component_indexing_bug = is_amd; | 238 | has_component_indexing_bug = is_amd; |
| 223 | has_precise_bug = TestPreciseBug(); | 239 | has_precise_bug = TestPreciseBug(); |
| 224 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | ||
| 225 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 240 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 241 | |||
| 242 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | ||
| 243 | // uniform buffers as "push constants" | ||
| 244 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | ||
| 245 | |||
| 246 | // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on | ||
| 247 | // DeleteBuffers. Disable unified memory on these devices. | ||
| 248 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing; | ||
| 249 | |||
| 226 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 250 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 227 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | 251 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 228 | GLAD_GL_NV_transform_feedback2; | 252 | GLAD_GL_NV_transform_feedback2; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index c86e709b1..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -72,6 +72,10 @@ public: | |||
| 72 | return has_texture_shadow_lod; | 72 | return has_texture_shadow_lod; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | bool HasVertexBufferUnifiedMemory() const { | ||
| 76 | return has_vertex_buffer_unified_memory; | ||
| 77 | } | ||
| 78 | |||
| 75 | bool HasASTC() const { | 79 | bool HasASTC() const { |
| 76 | return has_astc; | 80 | return has_astc; |
| 77 | } | 81 | } |
| @@ -115,6 +119,7 @@ private: | |||
| 115 | bool has_vertex_viewport_layer{}; | 119 | bool has_vertex_viewport_layer{}; |
| 116 | bool has_image_load_formatted{}; | 120 | bool has_image_load_formatted{}; |
| 117 | bool has_texture_shadow_lod{}; | 121 | bool has_texture_shadow_lod{}; |
| 122 | bool has_vertex_buffer_unified_memory{}; | ||
| 118 | bool has_astc{}; | 123 | bool has_astc{}; |
| 119 | bool has_variable_aoffi{}; | 124 | bool has_variable_aoffi{}; |
| 120 | bool has_component_indexing_bug{}; | 125 | bool has_component_indexing_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..362457ffe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | |||
| 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = | 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = |
| 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; |
| 63 | 63 | ||
| 64 | constexpr std::size_t NumSupportedVertexAttributes = 16; | 64 | constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 65 | constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; | ||
| 65 | 66 | ||
| 66 | template <typename Engine, typename Entry> | 67 | template <typename Engine, typename Entry> |
| 67 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 68 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| @@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 193 | // avoid OpenGL errors. | 194 | // avoid OpenGL errors. |
| 194 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't | 195 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't |
| 195 | // assume every shader uses them all. | 196 | // assume every shader uses them all. |
| 196 | for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { | 197 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { |
| 197 | if (!flags[Dirty::VertexFormat0 + index]) { | 198 | if (!flags[Dirty::VertexFormat0 + index]) { |
| 198 | continue; | 199 | continue; |
| 199 | } | 200 | } |
| @@ -231,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 231 | 232 | ||
| 232 | MICROPROFILE_SCOPE(OpenGL_VB); | 233 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 233 | 234 | ||
| 235 | const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); | ||
| 236 | |||
| 234 | // Upload all guest vertex arrays sequentially to our buffer | 237 | // Upload all guest vertex arrays sequentially to our buffer |
| 235 | const auto& regs = gpu.regs; | 238 | const auto& regs = gpu.regs; |
| 236 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 239 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { |
| 237 | if (!flags[Dirty::VertexBuffer0 + index]) { | 240 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 238 | continue; | 241 | continue; |
| 239 | } | 242 | } |
| @@ -246,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 246 | 249 | ||
| 247 | const GPUVAddr start = vertex_array.StartAddress(); | 250 | const GPUVAddr start = vertex_array.StartAddress(); |
| 248 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | 251 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); |
| 249 | |||
| 250 | ASSERT(end >= start); | 252 | ASSERT(end >= start); |
| 253 | |||
| 254 | const GLuint gl_index = static_cast<GLuint>(index); | ||
| 251 | const u64 size = end - start; | 255 | const u64 size = end - start; |
| 252 | if (size == 0) { | 256 | if (size == 0) { |
| 253 | glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); | 257 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); |
| 258 | if (use_unified_memory) { | ||
| 259 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); | ||
| 260 | } | ||
| 254 | continue; | 261 | continue; |
| 255 | } | 262 | } |
| 256 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); | 263 | const auto info = buffer_cache.UploadMemory(start, size); |
| 257 | glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, | 264 | if (use_unified_memory) { |
| 258 | vertex_array.stride); | 265 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); |
| 266 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, | ||
| 267 | info.address + info.offset, size); | ||
| 268 | } else { | ||
| 269 | glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); | ||
| 270 | } | ||
| 259 | } | 271 | } |
| 260 | } | 272 | } |
| 261 | 273 | ||
| @@ -268,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() { | |||
| 268 | flags[Dirty::VertexInstances] = false; | 280 | flags[Dirty::VertexInstances] = false; |
| 269 | 281 | ||
| 270 | const auto& regs = gpu.regs; | 282 | const auto& regs = gpu.regs; |
| 271 | for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { | 283 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { |
| 272 | if (!flags[Dirty::VertexInstance0 + index]) { | 284 | if (!flags[Dirty::VertexInstance0 + index]) { |
| 273 | continue; | 285 | continue; |
| 274 | } | 286 | } |
| @@ -285,9 +297,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 285 | MICROPROFILE_SCOPE(OpenGL_Index); | 297 | MICROPROFILE_SCOPE(OpenGL_Index); |
| 286 | const auto& regs = system.GPU().Maxwell3D().regs; | 298 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 287 | const std::size_t size = CalculateIndexBufferSize(); | 299 | const std::size_t size = CalculateIndexBufferSize(); |
| 288 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | 300 | const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); |
| 289 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); | 301 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); |
| 290 | return offset; | 302 | return info.offset; |
| 291 | } | 303 | } |
| 292 | 304 | ||
| 293 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 305 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| @@ -643,9 +655,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 643 | if (!device.UseAssemblyShaders()) { | 655 | if (!device.UseAssemblyShaders()) { |
| 644 | MaxwellUniformData ubo; | 656 | MaxwellUniformData ubo; |
| 645 | ubo.SetFromRegs(gpu); | 657 | ubo.SetFromRegs(gpu); |
| 646 | const auto [buffer, offset] = | 658 | const auto info = |
| 647 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 659 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 648 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, | 660 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, |
| 649 | static_cast<GLsizeiptr>(sizeof(ubo))); | 661 | static_cast<GLsizeiptr>(sizeof(ubo))); |
| 650 | } | 662 | } |
| 651 | 663 | ||
| @@ -956,8 +968,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 956 | if (device.UseAssemblyShaders()) { | 968 | if (device.UseAssemblyShaders()) { |
| 957 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); | 969 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); |
| 958 | } else { | 970 | } else { |
| 959 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | 971 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); |
| 960 | buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | ||
| 961 | } | 972 | } |
| 962 | return; | 973 | return; |
| 963 | } | 974 | } |
| @@ -970,24 +981,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 970 | 981 | ||
| 971 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); | 982 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); |
| 972 | const GPUVAddr gpu_addr = buffer.address; | 983 | const GPUVAddr gpu_addr = buffer.address; |
| 973 | auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); | 984 | auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); |
| 974 | 985 | ||
| 975 | if (device.UseAssemblyShaders()) { | 986 | if (device.UseAssemblyShaders()) { |
| 976 | UNIMPLEMENTED_IF(use_unified); | 987 | UNIMPLEMENTED_IF(use_unified); |
| 977 | if (offset != 0) { | 988 | if (info.offset != 0) { |
| 978 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | 989 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; |
| 979 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | 990 | glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); |
| 980 | cbuf = staging_cbuf; | 991 | info.handle = staging_cbuf; |
| 981 | offset = 0; | 992 | info.offset = 0; |
| 982 | } | 993 | } |
| 983 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | 994 | glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); |
| 984 | return; | 995 | return; |
| 985 | } | 996 | } |
| 986 | 997 | ||
| 987 | if (use_unified) { | 998 | if (use_unified) { |
| 988 | glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); | 999 | glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, |
| 1000 | unified_offset, size); | ||
| 989 | } else { | 1001 | } else { |
| 990 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | 1002 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); |
| 991 | } | 1003 | } |
| 992 | } | 1004 | } |
| 993 | 1005 | ||
| @@ -1023,9 +1035,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | |||
| 1023 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | 1035 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 1024 | GPUVAddr gpu_addr, std::size_t size) { | 1036 | GPUVAddr gpu_addr, std::size_t size) { |
| 1025 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 1037 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 1026 | const auto [ssbo, buffer_offset] = | 1038 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); |
| 1027 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | 1039 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, |
| 1028 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, | ||
| 1029 | static_cast<GLsizeiptr>(size)); | 1040 | static_cast<GLsizeiptr>(size)); |
| 1030 | } | 1041 | } |
| 1031 | 1042 | ||
| @@ -1712,8 +1723,9 @@ void RasterizerOpenGL::EndTransformFeedback() { | |||
| 1712 | const GLuint handle = transform_feedback_buffers[index].handle; | 1723 | const GLuint handle = transform_feedback_buffers[index].handle; |
| 1713 | const GPUVAddr gpu_addr = binding.Address(); | 1724 | const GPUVAddr gpu_addr = binding.Address(); |
| 1714 | const std::size_t size = binding.buffer_size; | 1725 | const std::size_t size = binding.buffer_size; |
| 1715 | const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | 1726 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); |
| 1716 | glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); | 1727 | glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, |
| 1728 | static_cast<GLsizeiptr>(size)); | ||
| 1717 | } | 1729 | } |
| 1718 | } | 1730 | } |
| 1719 | 1731 | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 932a2f69e..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -2,11 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <deque> | 5 | #include <tuple> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | |||
| 7 | #include "common/alignment.h" | 8 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 12 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 11 | 13 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | 14 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", |
| @@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | |||
| 14 | 16 | ||
| 15 | namespace OpenGL { | 17 | namespace OpenGL { |
| 16 | 18 | ||
| 17 | OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, | 19 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) |
| 18 | bool use_persistent) | ||
| 19 | : buffer_size(size) { | 20 | : buffer_size(size) { |
| 20 | gl_buffer.Create(); | 21 | gl_buffer.Create(); |
| 21 | 22 | ||
| @@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p | |||
| 29 | allocate_size *= 2; | 30 | allocate_size *= 2; |
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | if (use_persistent) { | 33 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; |
| 33 | persistent = true; | 34 | glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); |
| 34 | coherent = prefer_coherent; | 35 | mapped_ptr = static_cast<u8*>( |
| 35 | const GLbitfield flags = | 36 | glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); |
| 36 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); | 37 | |
| 37 | glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); | 38 | if (device.HasVertexBufferUnifiedMemory()) { |
| 38 | mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( | 39 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); |
| 39 | gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); | 40 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); |
| 40 | } else { | ||
| 41 | glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); | ||
| 42 | } | 41 | } |
| 43 | } | 42 | } |
| 44 | 43 | ||
| 45 | OGLStreamBuffer::~OGLStreamBuffer() { | 44 | OGLStreamBuffer::~OGLStreamBuffer() { |
| 46 | if (persistent) { | 45 | glUnmapNamedBuffer(gl_buffer.handle); |
| 47 | glUnmapNamedBuffer(gl_buffer.handle); | ||
| 48 | } | ||
| 49 | gl_buffer.Release(); | 46 | gl_buffer.Release(); |
| 50 | } | 47 | } |
| 51 | 48 | ||
| @@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a | |||
| 60 | 57 | ||
| 61 | bool invalidate = false; | 58 | bool invalidate = false; |
| 62 | if (buffer_pos + size > buffer_size) { | 59 | if (buffer_pos + size > buffer_size) { |
| 60 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | ||
| 61 | glInvalidateBufferData(gl_buffer.handle); | ||
| 62 | |||
| 63 | buffer_pos = 0; | 63 | buffer_pos = 0; |
| 64 | invalidate = true; | 64 | invalidate = true; |
| 65 | |||
| 66 | if (persistent) { | ||
| 67 | glUnmapNamedBuffer(gl_buffer.handle); | ||
| 68 | } | ||
| 69 | } | 65 | } |
| 70 | 66 | ||
| 71 | if (invalidate || !persistent) { | 67 | return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); |
| 72 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | ||
| 73 | GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | | ||
| 74 | (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | | ||
| 75 | (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); | ||
| 76 | mapped_ptr = static_cast<u8*>( | ||
| 77 | glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); | ||
| 78 | mapped_offset = buffer_pos; | ||
| 79 | } | ||
| 80 | |||
| 81 | return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); | ||
| 82 | } | 68 | } |
| 83 | 69 | ||
| 84 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | 70 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { |
| 85 | ASSERT(size <= mapped_size); | 71 | ASSERT(size <= mapped_size); |
| 86 | 72 | ||
| 87 | if (!coherent && size > 0) { | 73 | if (size > 0) { |
| 88 | glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); | 74 | glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); |
| 89 | } | ||
| 90 | |||
| 91 | if (!persistent) { | ||
| 92 | glUnmapNamedBuffer(gl_buffer.handle); | ||
| 93 | } | 75 | } |
| 94 | 76 | ||
| 95 | buffer_pos += size; | 77 | buffer_pos += size; |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 866da3594..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -11,10 +11,11 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class Device; | ||
| 15 | |||
| 14 | class OGLStreamBuffer : private NonCopyable { | 16 | class OGLStreamBuffer : private NonCopyable { |
| 15 | public: | 17 | public: |
| 16 | explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, | 18 | explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); |
| 17 | bool use_persistent = true); | ||
| 18 | ~OGLStreamBuffer(); | 19 | ~OGLStreamBuffer(); |
| 19 | 20 | ||
| 20 | /* | 21 | /* |
| @@ -33,19 +34,20 @@ public: | |||
| 33 | return gl_buffer.handle; | 34 | return gl_buffer.handle; |
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | GLsizeiptr Size() const { | 37 | u64 Address() const { |
| 38 | return gpu_address; | ||
| 39 | } | ||
| 40 | |||
| 41 | GLsizeiptr Size() const noexcept { | ||
| 37 | return buffer_size; | 42 | return buffer_size; |
| 38 | } | 43 | } |
| 39 | 44 | ||
| 40 | private: | 45 | private: |
| 41 | OGLBuffer gl_buffer; | 46 | OGLBuffer gl_buffer; |
| 42 | 47 | ||
| 43 | bool coherent = false; | 48 | GLuint64EXT gpu_address = 0; |
| 44 | bool persistent = false; | ||
| 45 | |||
| 46 | GLintptr buffer_pos = 0; | 49 | GLintptr buffer_pos = 0; |
| 47 | GLsizeiptr buffer_size = 0; | 50 | GLsizeiptr buffer_size = 0; |
| 48 | GLintptr mapped_offset = 0; | ||
| 49 | GLsizeiptr mapped_size = 0; | 51 | GLsizeiptr mapped_size = 0; |
| 50 | u8* mapped_ptr = nullptr; | 52 | u8* mapped_ptr = nullptr; |
| 51 | }; | 53 | }; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6214fcbc3..c40adb6e7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 488 | 488 | ||
| 489 | // Clear screen to black | 489 | // Clear screen to black |
| 490 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 490 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 491 | |||
| 492 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 493 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 494 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 495 | |||
| 496 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 497 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 498 | &vertex_buffer_address); | ||
| 499 | } | ||
| 491 | } | 500 | } |
| 492 | 501 | ||
| 493 | void RendererOpenGL::AddTelemetryFields() { | 502 | void RendererOpenGL::AddTelemetryFields() { |
| @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 656 | offsetof(ScreenRectVertex, tex_coord)); | 665 | offsetof(ScreenRectVertex, tex_coord)); |
| 657 | glVertexAttribBinding(PositionLocation, 0); | 666 | glVertexAttribBinding(PositionLocation, 0); |
| 658 | glVertexAttribBinding(TexCoordLocation, 0); | 667 | glVertexAttribBinding(TexCoordLocation, 0); |
| 659 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | 668 | if (device.HasVertexBufferUnifiedMemory()) { |
| 669 | glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); | ||
| 670 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, | ||
| 671 | sizeof(vertices)); | ||
| 672 | } else { | ||
| 673 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | ||
| 674 | } | ||
| 660 | 675 | ||
| 661 | glBindTextureUnit(0, screen_info.display_texture); | 676 | glBindTextureUnit(0, screen_info.display_texture); |
| 662 | glBindSampler(0, 0); | 677 | glBindSampler(0, 0); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -107,6 +107,9 @@ private: | |||
| 107 | OGLPipeline pipeline; | 107 | OGLPipeline pipeline; |
| 108 | OGLFramebuffer screenshot_framebuffer; | 108 | OGLFramebuffer screenshot_framebuffer; |
| 109 | 109 | ||
| 110 | // GPU address of the vertex buffer | ||
| 111 | GLuint64EXT vertex_buffer_address = 0; | ||
| 112 | |||
| 110 | /// Display information for Switch screen | 113 | /// Display information for Switch screen |
| 111 | ScreenInfo screen_info; | 114 | ScreenInfo screen_info; |
| 112 | 115 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1fde38328..f10f96cd8 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch | |||
| 37 | 37 | ||
| 38 | } // Anonymous namespace | 38 | } // Anonymous namespace |
| 39 | 39 | ||
| 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, | 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, |
| 41 | std::size_t size) | 41 | VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) |
| 42 | : VideoCommon::BufferBlock{cpu_addr, size} { | 42 | : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { |
| 43 | VkBufferCreateInfo ci; | 43 | VkBufferCreateInfo ci; |
| 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; | 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 45 | ci.pNext = nullptr; | 45 | ci.pNext = nullptr; |
| @@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp | |||
| 56 | 56 | ||
| 57 | Buffer::~Buffer() = default; | 57 | Buffer::~Buffer() = default; |
| 58 | 58 | ||
| 59 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 59 | void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { |
| 60 | const VKDevice& device, VKMemoryManager& memory_manager, | ||
| 61 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | ||
| 62 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, | ||
| 63 | CreateStreamBuffer(device, | ||
| 64 | scheduler)}, | ||
| 65 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | ||
| 66 | staging_pool} {} | ||
| 67 | |||
| 68 | VKBufferCache::~VKBufferCache() = default; | ||
| 69 | |||
| 70 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||
| 71 | return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); | ||
| 72 | } | ||
| 73 | |||
| 74 | VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||
| 75 | size = std::max(size, std::size_t(4)); | ||
| 76 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); | ||
| 77 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 78 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { | ||
| 79 | cmdbuf.FillBuffer(buffer, 0, size, 0); | ||
| 80 | }); | ||
| 81 | return *empty.handle; | ||
| 82 | } | ||
| 83 | |||
| 84 | void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 85 | const u8* data) { | ||
| 86 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 60 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 87 | std::memcpy(staging.commit->Map(size), data, size); | 61 | std::memcpy(staging.commit->Map(size), data, size); |
| 88 | 62 | ||
| 89 | scheduler.RequestOutsideRenderPassOperationContext(); | 63 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 90 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, | 64 | |
| 91 | size](vk::CommandBuffer cmdbuf) { | 65 | const VkBuffer handle = Handle(); |
| 92 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); | 66 | scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { |
| 67 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); | ||
| 93 | 68 | ||
| 94 | VkBufferMemoryBarrier barrier; | 69 | VkBufferMemoryBarrier barrier; |
| 95 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 70 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| @@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 98 | barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; | 73 | barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; |
| 99 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 74 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 100 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 75 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 101 | barrier.buffer = buffer; | 76 | barrier.buffer = handle; |
| 102 | barrier.offset = offset; | 77 | barrier.offset = offset; |
| 103 | barrier.size = size; | 78 | barrier.size = size; |
| 104 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | 79 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, |
| @@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 106 | }); | 81 | }); |
| 107 | } | 82 | } |
| 108 | 83 | ||
| 109 | void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 84 | void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { |
| 110 | u8* data) { | ||
| 111 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 85 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 112 | scheduler.RequestOutsideRenderPassOperationContext(); | 86 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 113 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, | 87 | |
| 114 | size](vk::CommandBuffer cmdbuf) { | 88 | const VkBuffer handle = Handle(); |
| 89 | scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { | ||
| 115 | VkBufferMemoryBarrier barrier; | 90 | VkBufferMemoryBarrier barrier; |
| 116 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 91 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 117 | barrier.pNext = nullptr; | 92 | barrier.pNext = nullptr; |
| @@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 119 | barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | 94 | barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; |
| 120 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 95 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 121 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 96 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 122 | barrier.buffer = buffer; | 97 | barrier.buffer = handle; |
| 123 | barrier.offset = offset; | 98 | barrier.offset = offset; |
| 124 | barrier.size = size; | 99 | barrier.size = size; |
| 125 | 100 | ||
| @@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 127 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | 102 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | |
| 128 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 103 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 129 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | 104 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); |
| 130 | cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); | 105 | cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); |
| 131 | }); | 106 | }); |
| 132 | scheduler.Finish(); | 107 | scheduler.Finish(); |
| 133 | 108 | ||
| 134 | std::memcpy(data, staging.commit->Map(size), size); | 109 | std::memcpy(data, staging.commit->Map(size), size); |
| 135 | } | 110 | } |
| 136 | 111 | ||
| 137 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 112 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, |
| 138 | std::size_t dst_offset, std::size_t size) { | 113 | std::size_t size) const { |
| 139 | scheduler.RequestOutsideRenderPassOperationContext(); | 114 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 140 | scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, | 115 | |
| 116 | const VkBuffer dst_buffer = Handle(); | ||
| 117 | scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, | ||
| 141 | size](vk::CommandBuffer cmdbuf) { | 118 | size](vk::CommandBuffer cmdbuf) { |
| 142 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); | 119 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); |
| 143 | 120 | ||
| @@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t | |||
| 165 | }); | 142 | }); |
| 166 | } | 143 | } |
| 167 | 144 | ||
| 145 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 146 | const VKDevice& device, VKMemoryManager& memory_manager, | ||
| 147 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | ||
| 148 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, | ||
| 149 | CreateStreamBuffer(device, | ||
| 150 | scheduler)}, | ||
| 151 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | ||
| 152 | staging_pool} {} | ||
| 153 | |||
| 154 | VKBufferCache::~VKBufferCache() = default; | ||
| 155 | |||
| 156 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||
| 157 | return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr, | ||
| 158 | size); | ||
| 159 | } | ||
| 160 | |||
| 161 | VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||
| 162 | size = std::max(size, std::size_t(4)); | ||
| 163 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); | ||
| 164 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 165 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { | ||
| 166 | cmdbuf.FillBuffer(buffer, 0, size, 0); | ||
| 167 | }); | ||
| 168 | return {*empty.handle, 0, 0}; | ||
| 169 | } | ||
| 170 | |||
| 168 | } // namespace Vulkan | 171 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 9ebbef835..3630aca77 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -25,15 +25,29 @@ class VKScheduler; | |||
| 25 | 25 | ||
| 26 | class Buffer final : public VideoCommon::BufferBlock { | 26 | class Buffer final : public VideoCommon::BufferBlock { |
| 27 | public: | 27 | public: |
| 28 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, | 28 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, |
| 29 | std::size_t size); | 29 | VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); |
| 30 | ~Buffer(); | 30 | ~Buffer(); |
| 31 | 31 | ||
| 32 | void Upload(std::size_t offset, std::size_t size, const u8* data) const; | ||
| 33 | |||
| 34 | void Download(std::size_t offset, std::size_t size, u8* data) const; | ||
| 35 | |||
| 36 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||
| 37 | std::size_t size) const; | ||
| 38 | |||
| 32 | VkBuffer Handle() const { | 39 | VkBuffer Handle() const { |
| 33 | return *buffer.handle; | 40 | return *buffer.handle; |
| 34 | } | 41 | } |
| 35 | 42 | ||
| 43 | u64 Address() const { | ||
| 44 | return 0; | ||
| 45 | } | ||
| 46 | |||
| 36 | private: | 47 | private: |
| 48 | VKScheduler& scheduler; | ||
| 49 | VKStagingBufferPool& staging_pool; | ||
| 50 | |||
| 37 | VKBuffer buffer; | 51 | VKBuffer buffer; |
| 38 | }; | 52 | }; |
| 39 | 53 | ||
| @@ -44,20 +58,11 @@ public: | |||
| 44 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); | 58 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); |
| 45 | ~VKBufferCache(); | 59 | ~VKBufferCache(); |
| 46 | 60 | ||
| 47 | VkBuffer GetEmptyBuffer(std::size_t size) override; | 61 | BufferInfo GetEmptyBuffer(std::size_t size) override; |
| 48 | 62 | ||
| 49 | protected: | 63 | protected: |
| 50 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 64 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 51 | 65 | ||
| 52 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 53 | const u8* data) override; | ||
| 54 | |||
| 55 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 56 | u8* data) override; | ||
| 57 | |||
| 58 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 59 | std::size_t dst_offset, std::size_t size) override; | ||
| 60 | |||
| 61 | private: | 66 | private: |
| 62 | const VKDevice& device; | 67 | const VKDevice& device; |
| 63 | VKMemoryManager& memory_manager; | 68 | VKMemoryManager& memory_manager; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 29001953c..e3714ee6d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 870 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | 870 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); |
| 871 | 871 | ||
| 872 | const GPUVAddr gpu_addr = binding.Address(); | 872 | const GPUVAddr gpu_addr = binding.Address(); |
| 873 | const auto size = static_cast<VkDeviceSize>(binding.buffer_size); | 873 | const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); |
| 874 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | 874 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); |
| 875 | 875 | ||
| 876 | scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { | 876 | scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { |
| 877 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); | 877 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); |
| 878 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | 878 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); |
| 879 | }); | 879 | }); |
| @@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 925 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); | 925 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); |
| 926 | continue; | 926 | continue; |
| 927 | } | 927 | } |
| 928 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); | 928 | const auto info = buffer_cache.UploadMemory(start, size); |
| 929 | buffer_bindings.AddVertexBinding(buffer, offset); | 929 | buffer_bindings.AddVertexBinding(info.handle, info.offset); |
| 930 | } | 930 | } |
| 931 | } | 931 | } |
| 932 | 932 | ||
| @@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 948 | break; | 948 | break; |
| 949 | } | 949 | } |
| 950 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | 950 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); |
| 951 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | 951 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); |
| 952 | VkBuffer buffer = info.handle; | ||
| 953 | u64 offset = info.offset; | ||
| 952 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( | 954 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( |
| 953 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | 955 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); |
| 954 | 956 | ||
| @@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 962 | break; | 964 | break; |
| 963 | } | 965 | } |
| 964 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | 966 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); |
| 965 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | 967 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); |
| 968 | VkBuffer buffer = info.handle; | ||
| 969 | u64 offset = info.offset; | ||
| 966 | 970 | ||
| 967 | auto format = regs.index_array.format; | 971 | auto format = regs.index_array.format; |
| 968 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | 972 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; |
| @@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | |||
| 1109 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | 1113 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); |
| 1110 | ASSERT(size <= MaxConstbufferSize); | 1114 | ASSERT(size <= MaxConstbufferSize); |
| 1111 | 1115 | ||
| 1112 | const auto [buffer_handle, offset] = | 1116 | const auto info = |
| 1113 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | 1117 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); |
| 1114 | 1118 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | |
| 1115 | update_descriptor_queue.AddBuffer(buffer_handle, offset, size); | ||
| 1116 | } | 1119 | } |
| 1117 | 1120 | ||
| 1118 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | 1121 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { |
| @@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1126 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | 1129 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the |
| 1127 | // default buffer. | 1130 | // default buffer. |
| 1128 | static constexpr std::size_t dummy_size = 4; | 1131 | static constexpr std::size_t dummy_size = 4; |
| 1129 | const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); | 1132 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); |
| 1130 | update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); | 1133 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); |
| 1131 | return; | 1134 | return; |
| 1132 | } | 1135 | } |
| 1133 | 1136 | ||
| 1134 | const auto [buffer, offset] = buffer_cache.UploadMemory( | 1137 | const auto info = buffer_cache.UploadMemory( |
| 1135 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | 1138 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); |
| 1136 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 1139 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); |
| 1137 | } | 1140 | } |
| 1138 | 1141 | ||
| 1139 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, | 1142 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index c765c60a0..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -35,10 +35,14 @@ public: | |||
| 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 36 | void Unmap(u64 size); | 36 | void Unmap(u64 size); |
| 37 | 37 | ||
| 38 | VkBuffer Handle() const { | 38 | VkBuffer Handle() const noexcept { |
| 39 | return *buffer; | 39 | return *buffer; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | u64 Address() const noexcept { | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | |||
| 42 | private: | 46 | private: |
| 43 | struct Watch final { | 47 | struct Watch final { |
| 44 | VKFenceWatch fence; | 48 | VKFenceWatch fence; |