summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-18 19:00:00 -0300
committerGravatar ReinUsesLisp2021-02-13 02:17:24 -0300
commit3da87d3f12d39b9a52625fa9e5e0c5defc0ac440 (patch)
treee7aba23cfda1ab2402c1d35f4c45d6b799523189 /src
parentbuffer_cache: Heuristically detect stream buffers (diff)
downloadyuzu-3da87d3f12d39b9a52625fa9e5e0c5defc0ac440.tar.gz
yuzu-3da87d3f12d39b9a52625fa9e5e0c5defc0ac440.tar.xz
yuzu-3da87d3f12d39b9a52625fa9e5e0c5defc0ac440.zip
gl_buffer_cache: Drop interop based parameter buffer workarounds
Sacrify runtime performance to avoid generating kernel exceptions on Windows due to our abusive aliasing of interop buffer objects.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp86
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h20
3 files changed, 45 insertions, 65 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 0fff42826..a296036f4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -91,7 +91,7 @@ class BufferCache {
91 }; 91 };
92 92
93public: 93public:
94 static constexpr size_t SKIP_CACHE_SIZE = 4096; 94 static constexpr u32 SKIP_CACHE_SIZE = 4096;
95 95
96 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 96 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
97 Tegra::Engines::Maxwell3D& maxwell3d_, 97 Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -671,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
671 const VAddr cpu_addr = binding.cpu_addr; 671 const VAddr cpu_addr = binding.cpu_addr;
672 const u32 size = binding.size; 672 const u32 size = binding.size;
673 Buffer& buffer = slot_buffers[binding.buffer_id]; 673 Buffer& buffer = slot_buffers[binding.buffer_id];
674 if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) { 674 if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
675 if constexpr (IS_OPENGL) { 675 if constexpr (IS_OPENGL) {
676 if (runtime.HasFastBufferSubData()) { 676 if (runtime.HasFastBufferSubData()) {
677 // Fast path for Nvidia 677 // Fast path for Nvidia
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 889ad6c56..1e555098d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -36,13 +36,8 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
36 buffer.Create(); 36 buffer.Create();
37 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); 37 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
38 glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); 38 glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
39 if (runtime.device.UseAssemblyShaders()) { 39 glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
40 CreateMemoryObjects(runtime); 40
41 glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(),
42 memory_commit.Offset());
43 } else {
44 glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
45 }
46 if (runtime.has_unified_vertex_buffers) { 41 if (runtime.has_unified_vertex_buffers) {
47 glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); 42 glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
48 } 43 }
@@ -71,61 +66,33 @@ void Buffer::MakeResident(GLenum access) noexcept {
71 glMakeNamedBufferResidentNV(buffer.handle, access); 66 glMakeNamedBufferResidentNV(buffer.handle, access);
72} 67}
73 68
74GLuint Buffer::SubBuffer(u32 offset) {
75 if (offset == 0) {
76 return buffer.handle;
77 }
78 for (const auto& [sub_buffer, sub_offset] : subs) {
79 if (sub_offset == offset) {
80 return sub_buffer.handle;
81 }
82 }
83 OGLBuffer sub_buffer;
84 sub_buffer.Create();
85 glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset,
86 memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset);
87 return subs.emplace_back(std::move(sub_buffer), offset).first.handle;
88}
89
90void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) {
91 auto& allocator = runtime.vulkan_memory_allocator;
92 auto& device = runtime.vulkan_device->GetLogical();
93 auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{
94 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
95 .pNext = nullptr,
96 .flags = 0,
97 .size = SizeBytes(),
98 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
99 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
100 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
101 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
102 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
103 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
104 .queueFamilyIndexCount = 0,
105 .pQueueFamilyIndices = nullptr,
106 });
107 const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer);
108 memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal);
109}
110
111BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, 69BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
112 Vulkan::MemoryAllocator* vulkan_memory_allocator_) 70 Vulkan::MemoryAllocator* vulkan_memory_allocator_)
113 : device{device_}, vulkan_device{vulkan_device_}, 71 : device{device_}, vulkan_device{vulkan_device_},
114 vulkan_memory_allocator{vulkan_memory_allocator_}, 72 vulkan_memory_allocator{vulkan_memory_allocator_},
115 stream_buffer{device.HasFastBufferSubData() ? std::nullopt 73 has_fast_buffer_sub_data{device.HasFastBufferSubData()},
116 : std::make_optional<StreamBuffer>()} { 74 use_assembly_shaders{device.UseAssemblyShaders()},
75 has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
76 stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
117 GLint gl_max_attributes; 77 GLint gl_max_attributes;
118 glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); 78 glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
119 max_attributes = static_cast<u32>(gl_max_attributes); 79 max_attributes = static_cast<u32>(gl_max_attributes);
120 use_assembly_shaders = device.UseAssemblyShaders();
121 has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory();
122
123 for (auto& stage_uniforms : fast_uniforms) { 80 for (auto& stage_uniforms : fast_uniforms) {
124 for (OGLBuffer& buffer : stage_uniforms) { 81 for (OGLBuffer& buffer : stage_uniforms) {
125 buffer.Create(); 82 buffer.Create();
126 glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); 83 glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
127 } 84 }
128 } 85 }
86 for (auto& stage_uniforms : copy_uniforms) {
87 for (OGLBuffer& buffer : stage_uniforms) {
88 buffer.Create();
89 glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
90 }
91 }
92 for (OGLBuffer& buffer : copy_compute_uniforms) {
93 buffer.Create();
94 glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
95 }
129} 96}
130 97
131void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 98void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
@@ -167,8 +134,14 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
167void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, 134void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
168 u32 offset, u32 size) { 135 u32 offset, u32 size) {
169 if (use_assembly_shaders) { 136 if (use_assembly_shaders) {
170 const GLuint sub_buffer = buffer.SubBuffer(offset); 137 GLuint handle;
171 glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0, 138 if (offset != 0) {
139 handle = copy_uniforms[stage][binding_index].handle;
140 glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
141 } else {
142 handle = buffer.Handle();
143 }
144 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
172 static_cast<GLsizeiptr>(size)); 145 static_cast<GLsizeiptr>(size));
173 } else { 146 } else {
174 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 147 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
@@ -181,8 +154,15 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
181void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, 154void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
182 u32 size) { 155 u32 size) {
183 if (use_assembly_shaders) { 156 if (use_assembly_shaders) {
184 glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, 157 GLuint handle;
185 buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size)); 158 if (offset != 0) {
159 handle = copy_compute_uniforms[binding_index].handle;
160 glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
161 } else {
162 handle = buffer.Handle();
163 }
164 glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
165 static_cast<GLsizeiptr>(size));
186 } else { 166 } else {
187 glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), 167 glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
188 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 168 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f4d8871a9..35c9deb51 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -39,8 +39,6 @@ public:
39 39
40 void MakeResident(GLenum access) noexcept; 40 void MakeResident(GLenum access) noexcept;
41 41
42 [[nodiscard]] GLuint SubBuffer(u32 offset);
43
44 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { 42 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
45 return address; 43 return address;
46 } 44 }
@@ -50,13 +48,9 @@ public:
50 } 48 }
51 49
52private: 50private:
53 void CreateMemoryObjects(BufferCacheRuntime& runtime);
54
55 GLuint64EXT address = 0; 51 GLuint64EXT address = 0;
56 Vulkan::MemoryCommit memory_commit;
57 OGLBuffer buffer; 52 OGLBuffer buffer;
58 GLenum current_residency_access = GL_NONE; 53 GLenum current_residency_access = GL_NONE;
59 std::vector<std::pair<OGLBuffer, u32>> subs;
60}; 54};
61 55
62class BufferCacheRuntime { 56class BufferCacheRuntime {
@@ -127,7 +121,7 @@ public:
127 } 121 }
128 122
129 [[nodiscard]] bool HasFastBufferSubData() const noexcept { 123 [[nodiscard]] bool HasFastBufferSubData() const noexcept {
130 return device.HasFastBufferSubData(); 124 return has_fast_buffer_sub_data;
131 } 125 }
132 126
133private: 127private:
@@ -140,16 +134,22 @@ private:
140 const Device& device; 134 const Device& device;
141 const Vulkan::Device* vulkan_device; 135 const Vulkan::Device* vulkan_device;
142 Vulkan::MemoryAllocator* vulkan_memory_allocator; 136 Vulkan::MemoryAllocator* vulkan_memory_allocator;
143 std::optional<StreamBuffer> stream_buffer;
144
145 u32 max_attributes = 0;
146 137
138 bool has_fast_buffer_sub_data = false;
147 bool use_assembly_shaders = false; 139 bool use_assembly_shaders = false;
148 bool has_unified_vertex_buffers = false; 140 bool has_unified_vertex_buffers = false;
149 141
142 u32 max_attributes = 0;
143
144 std::optional<StreamBuffer> stream_buffer;
145
150 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, 146 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
151 VideoCommon::NUM_STAGES> 147 VideoCommon::NUM_STAGES>
152 fast_uniforms; 148 fast_uniforms;
149 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
150 VideoCommon::NUM_STAGES>
151 copy_uniforms;
152 std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
153 153
154 u32 index_buffer_offset = 0; 154 u32 index_buffer_offset = 0;
155}; 155};