summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-05-11 16:35:04 -0300
committerGravatar ReinUsesLisp2020-06-24 02:36:14 -0300
commit32485917ba7cb7b2f0cad766c0897365294650a7 (patch)
tree48805f8321d9352203664a2fb28e6e504684b11d /src
parentgl_device: Expose NV_vertex_buffer_unified_memory except on Turing (diff)
downloadyuzu-32485917ba7cb7b2f0cad766c0897365294650a7.tar.gz
yuzu-32485917ba7cb7b2f0cad766c0897365294650a7.tar.xz
yuzu-32485917ba7cb7b2f0cad766c0897365294650a7.zip
gl_buffer_cache: Mark buffers as resident
Make stream buffer and cached buffers as resident and query their address. This allows us to use GPU addresses for several proprietary Nvidia extensions.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp31
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h6
10 files changed, 111 insertions, 67 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index bae1d527c..6ea59253a 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -41,7 +41,11 @@ class BufferCache {
41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; 41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
42 42
43public: 43public:
44 using BufferInfo = std::pair<BufferType, u64>; 44 struct BufferInfo {
45 BufferType handle;
46 u64 offset;
47 u64 address;
48 };
45 49
46 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 50 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
47 bool is_written = false, bool use_fast_cbuf = false) { 51 bool is_written = false, bool use_fast_cbuf = false) {
@@ -50,7 +54,7 @@ public:
50 auto& memory_manager = system.GPU().MemoryManager(); 54 auto& memory_manager = system.GPU().MemoryManager();
51 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); 55 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
52 if (!cpu_addr_opt) { 56 if (!cpu_addr_opt) {
53 return {GetEmptyBuffer(size), 0}; 57 return GetEmptyBuffer(size);
54 } 58 }
55 const VAddr cpu_addr = *cpu_addr_opt; 59 const VAddr cpu_addr = *cpu_addr_opt;
56 60
@@ -88,7 +92,7 @@ public:
88 Buffer* const block = GetBlock(cpu_addr, size); 92 Buffer* const block = GetBlock(cpu_addr, size);
89 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); 93 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
90 if (!map) { 94 if (!map) {
91 return {GetEmptyBuffer(size), 0}; 95 return GetEmptyBuffer(size);
92 } 96 }
93 if (is_written) { 97 if (is_written) {
94 map->MarkAsModified(true, GetModifiedTicks()); 98 map->MarkAsModified(true, GetModifiedTicks());
@@ -101,7 +105,7 @@ public:
101 } 105 }
102 } 106 }
103 107
104 return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; 108 return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
105 } 109 }
106 110
107 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. 111 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -254,13 +258,12 @@ public:
254 committed_flushes.pop_front(); 258 committed_flushes.pop_front();
255 } 259 }
256 260
257 virtual BufferType GetEmptyBuffer(std::size_t size) = 0; 261 virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
258 262
259protected: 263protected:
260 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 264 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
261 std::unique_ptr<StreamBuffer> stream_buffer_) 265 std::unique_ptr<StreamBuffer> stream_buffer)
262 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, 266 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
263 stream_buffer_handle{stream_buffer->Handle()} {}
264 267
265 ~BufferCache() = default; 268 ~BufferCache() = default;
266 269
@@ -449,7 +452,7 @@ private:
449 452
450 buffer_ptr += size; 453 buffer_ptr += size;
451 buffer_offset += size; 454 buffer_offset += size;
452 return {stream_buffer_handle, uploaded_offset}; 455 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
453 } 456 }
454 457
455 void AlignBuffer(std::size_t alignment) { 458 void AlignBuffer(std::size_t alignment) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index ad0577a4f..e09b47f57 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,21 +22,28 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22 22
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
24 24
25Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { 25Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
26 : VideoCommon::BufferBlock{cpu_addr, size} {
26 gl_buffer.Create(); 27 gl_buffer.Create();
27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
29 if (device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 }
28} 33}
29 34
30Buffer::~Buffer() = default; 35Buffer::~Buffer() = default;
31 36
32OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 37OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
33 const Device& device, std::size_t stream_size) 38 const Device& device_, std::size_t stream_size)
34 : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { 39 : GenericBufferCache{rasterizer, system,
40 std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
41 device{device_} {
35 if (!device.HasFastBufferSubData()) { 42 if (!device.HasFastBufferSubData()) {
36 return; 43 return;
37 } 44 }
38 45
39 static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); 46 static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
40 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 47 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
41 for (const GLuint cbuf : cbufs) { 48 for (const GLuint cbuf : cbufs) {
42 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); 49 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -48,11 +55,11 @@ OGLBufferCache::~OGLBufferCache() {
48} 55}
49 56
50std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 57std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
51 return std::make_shared<Buffer>(cpu_addr, size); 58 return std::make_shared<Buffer>(device, cpu_addr, size);
52} 59}
53 60
54GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { 61OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
55 return 0; 62 return {0, 0, 0};
56} 63}
57 64
58void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 65void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
@@ -79,8 +86,9 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi
79 std::size_t size) { 86 std::size_t size) {
80 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); 87 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
81 const GLuint cbuf = cbufs[cbuf_cursor++]; 88 const GLuint cbuf = cbufs[cbuf_cursor++];
89
82 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); 90 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
83 return {cbuf, 0}; 91 return {cbuf, 0, 0};
84} 92}
85 93
86} // namespace OpenGL 94} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a49aaf9c4..6462cfae5 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -25,15 +25,20 @@ class RasterizerOpenGL;
25 25
26class Buffer : public VideoCommon::BufferBlock { 26class Buffer : public VideoCommon::BufferBlock {
27public: 27public:
28 explicit Buffer(VAddr cpu_addr, const std::size_t size); 28 explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
29 ~Buffer(); 29 ~Buffer();
30 30
31 GLuint Handle() const { 31 GLuint Handle() const noexcept {
32 return gl_buffer.handle; 32 return gl_buffer.handle;
33 } 33 }
34 34
35 u64 Address() const noexcept {
36 return gpu_address;
37 }
38
35private: 39private:
36 OGLBuffer gl_buffer; 40 OGLBuffer gl_buffer;
41 u64 gpu_address = 0;
37}; 42};
38 43
39using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; 44using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
@@ -43,7 +48,7 @@ public:
43 const Device& device, std::size_t stream_size); 48 const Device& device, std::size_t stream_size);
44 ~OGLBufferCache(); 49 ~OGLBufferCache();
45 50
46 GLuint GetEmptyBuffer(std::size_t) override; 51 BufferInfo GetEmptyBuffer(std::size_t) override;
47 52
48 void Acquire() noexcept { 53 void Acquire() noexcept {
49 cbuf_cursor = 0; 54 cbuf_cursor = 0;
@@ -64,10 +69,13 @@ protected:
64 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; 69 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
65 70
66private: 71private:
72 static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
73 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
74
75 const Device& device;
76
67 std::size_t cbuf_cursor = 0; 77 std::size_t cbuf_cursor = 0;
68 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * 78 std::array<GLuint, NUM_CBUFS> cbufs{};
69 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
70 cbufs;
71}; 79};
72 80
73} // namespace OpenGL 81} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2d6c11320..7cb378a71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -253,8 +253,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
253 glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); 253 glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
254 continue; 254 continue;
255 } 255 }
256 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); 256 const auto info = buffer_cache.UploadMemory(start, size);
257 glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, 257 glBindVertexBuffer(static_cast<GLuint>(index), info.handle, info.offset,
258 vertex_array.stride); 258 vertex_array.stride);
259 } 259 }
260} 260}
@@ -285,9 +285,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
285 MICROPROFILE_SCOPE(OpenGL_Index); 285 MICROPROFILE_SCOPE(OpenGL_Index);
286 const auto& regs = system.GPU().Maxwell3D().regs; 286 const auto& regs = system.GPU().Maxwell3D().regs;
287 const std::size_t size = CalculateIndexBufferSize(); 287 const std::size_t size = CalculateIndexBufferSize();
288 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); 288 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
289 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); 289 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
290 return offset; 290 return info.offset;
291} 291}
292 292
293void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 293void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@@ -643,9 +643,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
643 if (!device.UseAssemblyShaders()) { 643 if (!device.UseAssemblyShaders()) {
644 MaxwellUniformData ubo; 644 MaxwellUniformData ubo;
645 ubo.SetFromRegs(gpu); 645 ubo.SetFromRegs(gpu);
646 const auto [buffer, offset] = 646 const auto info =
647 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 647 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
648 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, 648 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
649 static_cast<GLsizeiptr>(sizeof(ubo))); 649 static_cast<GLsizeiptr>(sizeof(ubo)));
650 } 650 }
651 651
@@ -956,8 +956,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
956 if (device.UseAssemblyShaders()) { 956 if (device.UseAssemblyShaders()) {
957 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); 957 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
958 } else { 958 } else {
959 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 959 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
960 buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
961 } 960 }
962 return; 961 return;
963 } 962 }
@@ -970,24 +969,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
970 969
971 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); 970 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
972 const GPUVAddr gpu_addr = buffer.address; 971 const GPUVAddr gpu_addr = buffer.address;
973 auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); 972 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
974 973
975 if (device.UseAssemblyShaders()) { 974 if (device.UseAssemblyShaders()) {
976 UNIMPLEMENTED_IF(use_unified); 975 UNIMPLEMENTED_IF(use_unified);
977 if (offset != 0) { 976 if (info.offset != 0) {
978 const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; 977 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
979 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); 978 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
980 cbuf = staging_cbuf; 979 info.handle = staging_cbuf;
981 offset = 0; 980 info.offset = 0;
982 } 981 }
983 glBindBufferRangeNV(stage, binding, cbuf, offset, size); 982 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
984 return; 983 return;
985 } 984 }
986 985
987 if (use_unified) { 986 if (use_unified) {
988 glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); 987 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
988 unified_offset, size);
989 } else { 989 } else {
990 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 990 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
991 } 991 }
992} 992}
993 993
@@ -1023,9 +1023,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1023void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, 1023void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1024 GPUVAddr gpu_addr, std::size_t size) { 1024 GPUVAddr gpu_addr, std::size_t size) {
1025 const auto alignment{device.GetShaderStorageBufferAlignment()}; 1025 const auto alignment{device.GetShaderStorageBufferAlignment()};
1026 const auto [ssbo, buffer_offset] = 1026 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1027 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); 1027 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1028 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
1029 static_cast<GLsizeiptr>(size)); 1028 static_cast<GLsizeiptr>(size));
1030} 1029}
1031 1030
@@ -1712,8 +1711,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
1712 const GLuint handle = transform_feedback_buffers[index].handle; 1711 const GLuint handle = transform_feedback_buffers[index].handle;
1713 const GPUVAddr gpu_addr = binding.Address(); 1712 const GPUVAddr gpu_addr = binding.Address();
1714 const std::size_t size = binding.buffer_size; 1713 const std::size_t size = binding.buffer_size;
1715 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 1714 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1716 glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); 1715 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1716 static_cast<GLsizeiptr>(size));
1717 } 1717 }
1718} 1718}
1719 1719
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index aeafcfbfe..164df4feb 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,12 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <deque> 5#include <tuple>
6#include <vector> 6#include <vector>
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h"
11#include "video_core/renderer_opengl/gl_stream_buffer.h" 12#include "video_core/renderer_opengl/gl_stream_buffer.h"
12 13
13MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -15,7 +16,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
15 16
16namespace OpenGL { 17namespace OpenGL {
17 18
18OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { 19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
20 : buffer_size(size) {
19 gl_buffer.Create(); 21 gl_buffer.Create();
20 22
21 GLsizeiptr allocate_size = size; 23 GLsizeiptr allocate_size = size;
@@ -32,6 +34,11 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buff
32 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
33 mapped_ptr = static_cast<u8*>( 35 mapped_ptr = static_cast<u8*>(
34 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
37
38 if (device.HasVertexBufferUnifiedMemory()) {
39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
40 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
41 }
35} 42}
36 43
37OGLStreamBuffer::~OGLStreamBuffer() { 44OGLStreamBuffer::~OGLStreamBuffer() {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 826c2e361..e67a82980 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,9 +11,11 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class Device;
15
14class OGLStreamBuffer : private NonCopyable { 16class OGLStreamBuffer : private NonCopyable {
15public: 17public:
16 explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage); 18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
17 ~OGLStreamBuffer(); 19 ~OGLStreamBuffer();
18 20
19 /* 21 /*
@@ -32,13 +34,18 @@ public:
32 return gl_buffer.handle; 34 return gl_buffer.handle;
33 } 35 }
34 36
35 GLsizeiptr Size() const { 37 u64 Address() const {
38 return gpu_address;
39 }
40
41 GLsizeiptr Size() const noexcept {
36 return buffer_size; 42 return buffer_size;
37 } 43 }
38 44
39private: 45private:
40 OGLBuffer gl_buffer; 46 OGLBuffer gl_buffer;
41 47
48 GLuint64EXT gpu_address = 0;
42 GLintptr buffer_pos = 0; 49 GLintptr buffer_pos = 0;
43 GLsizeiptr buffer_size = 0; 50 GLsizeiptr buffer_size = 0;
44 GLintptr mapped_offset = 0; 51 GLintptr mapped_offset = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1fde38328..df258d7a4 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -71,14 +71,14 @@ std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t s
71 return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); 71 return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
72} 72}
73 73
74VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { 74VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
75 size = std::max(size, std::size_t(4)); 75 size = std::max(size, std::size_t(4));
76 const auto& empty = staging_pool.GetUnusedBuffer(size, false); 76 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
77 scheduler.RequestOutsideRenderPassOperationContext(); 77 scheduler.RequestOutsideRenderPassOperationContext();
78 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { 78 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
79 cmdbuf.FillBuffer(buffer, 0, size, 0); 79 cmdbuf.FillBuffer(buffer, 0, size, 0);
80 }); 80 });
81 return *empty.handle; 81 return {*empty.handle, 0, 0};
82} 82}
83 83
84void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 84void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 9ebbef835..682383ff2 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -33,6 +33,10 @@ public:
33 return *buffer.handle; 33 return *buffer.handle;
34 } 34 }
35 35
36 u64 Address() const {
37 return 0;
38 }
39
36private: 40private:
37 VKBuffer buffer; 41 VKBuffer buffer;
38}; 42};
@@ -44,7 +48,7 @@ public:
44 VKScheduler& scheduler, VKStagingBufferPool& staging_pool); 48 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
45 ~VKBufferCache(); 49 ~VKBufferCache();
46 50
47 VkBuffer GetEmptyBuffer(std::size_t size) override; 51 BufferInfo GetEmptyBuffer(std::size_t size) override;
48 52
49protected: 53protected:
50 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 54 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 29001953c..e3714ee6d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
870 UNIMPLEMENTED_IF(binding.buffer_offset != 0); 870 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
871 871
872 const GPUVAddr gpu_addr = binding.Address(); 872 const GPUVAddr gpu_addr = binding.Address();
873 const auto size = static_cast<VkDeviceSize>(binding.buffer_size); 873 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
874 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 874 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
875 875
876 scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { 876 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
877 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); 877 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
878 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); 878 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
879 }); 879 });
@@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
925 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); 925 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
926 continue; 926 continue;
927 } 927 }
928 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); 928 const auto info = buffer_cache.UploadMemory(start, size);
929 buffer_bindings.AddVertexBinding(buffer, offset); 929 buffer_bindings.AddVertexBinding(info.handle, info.offset);
930 } 930 }
931} 931}
932 932
@@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
948 break; 948 break;
949 } 949 }
950 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 950 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
951 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 951 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
952 VkBuffer buffer = info.handle;
953 u64 offset = info.offset;
952 std::tie(buffer, offset) = quad_indexed_pass.Assemble( 954 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
953 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); 955 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
954 956
@@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
962 break; 964 break;
963 } 965 }
964 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 966 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
965 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 967 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
968 VkBuffer buffer = info.handle;
969 u64 offset = info.offset;
966 970
967 auto format = regs.index_array.format; 971 auto format = regs.index_array.format;
968 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; 972 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1109 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); 1113 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1110 ASSERT(size <= MaxConstbufferSize); 1114 ASSERT(size <= MaxConstbufferSize);
1111 1115
1112 const auto [buffer_handle, offset] = 1116 const auto info =
1113 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1117 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
1114 1118 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1115 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
1116} 1119}
1117 1120
1118void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { 1121void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1126 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1129 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1127 // default buffer. 1130 // default buffer.
1128 static constexpr std::size_t dummy_size = 4; 1131 static constexpr std::size_t dummy_size = 4;
1129 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); 1132 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1130 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); 1133 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1131 return; 1134 return;
1132 } 1135 }
1133 1136
1134 const auto [buffer, offset] = buffer_cache.UploadMemory( 1137 const auto info = buffer_cache.UploadMemory(
1135 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); 1138 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1136 update_descriptor_queue.AddBuffer(buffer, offset, size); 1139 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1137} 1140}
1138 1141
1139void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, 1142void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index c765c60a0..689f0d276 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,10 +35,14 @@ public:
35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
36 void Unmap(u64 size); 36 void Unmap(u64 size);
37 37
38 VkBuffer Handle() const { 38 VkBuffer Handle() const noexcept {
39 return *buffer; 39 return *buffer;
40 } 40 }
41 41
42 u64 Address() const noexcept {
43 return 0;
44 }
45
42private: 46private:
43 struct Watch final { 47 struct Watch final {
44 VKFenceWatch fence; 48 VKFenceWatch fence;