diff options
| author | 2021-01-18 19:00:00 -0300 | |
|---|---|---|
| committer | 2021-02-13 02:17:24 -0300 | |
| commit | 3da87d3f12d39b9a52625fa9e5e0c5defc0ac440 (patch) | |
| tree | e7aba23cfda1ab2402c1d35f4c45d6b799523189 /src | |
| parent | buffer_cache: Heuristically detect stream buffers (diff) | |
| download | yuzu-3da87d3f12d39b9a52625fa9e5e0c5defc0ac440.tar.gz yuzu-3da87d3f12d39b9a52625fa9e5e0c5defc0ac440.tar.xz yuzu-3da87d3f12d39b9a52625fa9e5e0c5defc0ac440.zip | |
gl_buffer_cache: Drop interop based parameter buffer workarounds
Sacrify runtime performance to avoid generating kernel exceptions on
Windows due to our abusive aliasing of interop buffer objects.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 86 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 20 |
3 files changed, 45 insertions, 65 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0fff42826..a296036f4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -91,7 +91,7 @@ class BufferCache { | |||
| 91 | }; | 91 | }; |
| 92 | 92 | ||
| 93 | public: | 93 | public: |
| 94 | static constexpr size_t SKIP_CACHE_SIZE = 4096; | 94 | static constexpr u32 SKIP_CACHE_SIZE = 4096; |
| 95 | 95 | ||
| 96 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 96 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 97 | Tegra::Engines::Maxwell3D& maxwell3d_, | 97 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| @@ -671,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 671 | const VAddr cpu_addr = binding.cpu_addr; | 671 | const VAddr cpu_addr = binding.cpu_addr; |
| 672 | const u32 size = binding.size; | 672 | const u32 size = binding.size; |
| 673 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 673 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 674 | if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) { | 674 | if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { |
| 675 | if constexpr (IS_OPENGL) { | 675 | if constexpr (IS_OPENGL) { |
| 676 | if (runtime.HasFastBufferSubData()) { | 676 | if (runtime.HasFastBufferSubData()) { |
| 677 | // Fast path for Nvidia | 677 | // Fast path for Nvidia |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 889ad6c56..1e555098d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -36,13 +36,8 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast | |||
| 36 | buffer.Create(); | 36 | buffer.Create(); |
| 37 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | 37 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |
| 38 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | 38 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); |
| 39 | if (runtime.device.UseAssemblyShaders()) { | 39 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); |
| 40 | CreateMemoryObjects(runtime); | 40 | |
| 41 | glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(), | ||
| 42 | memory_commit.Offset()); | ||
| 43 | } else { | ||
| 44 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||
| 45 | } | ||
| 46 | if (runtime.has_unified_vertex_buffers) { | 41 | if (runtime.has_unified_vertex_buffers) { |
| 47 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | 42 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); |
| 48 | } | 43 | } |
| @@ -71,61 +66,33 @@ void Buffer::MakeResident(GLenum access) noexcept { | |||
| 71 | glMakeNamedBufferResidentNV(buffer.handle, access); | 66 | glMakeNamedBufferResidentNV(buffer.handle, access); |
| 72 | } | 67 | } |
| 73 | 68 | ||
| 74 | GLuint Buffer::SubBuffer(u32 offset) { | ||
| 75 | if (offset == 0) { | ||
| 76 | return buffer.handle; | ||
| 77 | } | ||
| 78 | for (const auto& [sub_buffer, sub_offset] : subs) { | ||
| 79 | if (sub_offset == offset) { | ||
| 80 | return sub_buffer.handle; | ||
| 81 | } | ||
| 82 | } | ||
| 83 | OGLBuffer sub_buffer; | ||
| 84 | sub_buffer.Create(); | ||
| 85 | glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset, | ||
| 86 | memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset); | ||
| 87 | return subs.emplace_back(std::move(sub_buffer), offset).first.handle; | ||
| 88 | } | ||
| 89 | |||
| 90 | void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) { | ||
| 91 | auto& allocator = runtime.vulkan_memory_allocator; | ||
| 92 | auto& device = runtime.vulkan_device->GetLogical(); | ||
| 93 | auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{ | ||
| 94 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 95 | .pNext = nullptr, | ||
| 96 | .flags = 0, | ||
| 97 | .size = SizeBytes(), | ||
| 98 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 99 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 100 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 101 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 102 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 103 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 104 | .queueFamilyIndexCount = 0, | ||
| 105 | .pQueueFamilyIndices = nullptr, | ||
| 106 | }); | ||
| 107 | const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer); | ||
| 108 | memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal); | ||
| 109 | } | ||
| 110 | |||
| 111 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, | 69 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, |
| 112 | Vulkan::MemoryAllocator* vulkan_memory_allocator_) | 70 | Vulkan::MemoryAllocator* vulkan_memory_allocator_) |
| 113 | : device{device_}, vulkan_device{vulkan_device_}, | 71 | : device{device_}, vulkan_device{vulkan_device_}, |
| 114 | vulkan_memory_allocator{vulkan_memory_allocator_}, | 72 | vulkan_memory_allocator{vulkan_memory_allocator_}, |
| 115 | stream_buffer{device.HasFastBufferSubData() ? std::nullopt | 73 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, |
| 116 | : std::make_optional<StreamBuffer>()} { | 74 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| 75 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | ||
| 76 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | ||
| 117 | GLint gl_max_attributes; | 77 | GLint gl_max_attributes; |
| 118 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | 78 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); |
| 119 | max_attributes = static_cast<u32>(gl_max_attributes); | 79 | max_attributes = static_cast<u32>(gl_max_attributes); |
| 120 | use_assembly_shaders = device.UseAssemblyShaders(); | ||
| 121 | has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory(); | ||
| 122 | |||
| 123 | for (auto& stage_uniforms : fast_uniforms) { | 80 | for (auto& stage_uniforms : fast_uniforms) { |
| 124 | for (OGLBuffer& buffer : stage_uniforms) { | 81 | for (OGLBuffer& buffer : stage_uniforms) { |
| 125 | buffer.Create(); | 82 | buffer.Create(); |
| 126 | glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); | 83 | glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); |
| 127 | } | 84 | } |
| 128 | } | 85 | } |
| 86 | for (auto& stage_uniforms : copy_uniforms) { | ||
| 87 | for (OGLBuffer& buffer : stage_uniforms) { | ||
| 88 | buffer.Create(); | ||
| 89 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 90 | } | ||
| 91 | } | ||
| 92 | for (OGLBuffer& buffer : copy_compute_uniforms) { | ||
| 93 | buffer.Create(); | ||
| 94 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 95 | } | ||
| 129 | } | 96 | } |
| 130 | 97 | ||
| 131 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 98 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| @@ -167,8 +134,14 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, | |||
| 167 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | 134 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 168 | u32 offset, u32 size) { | 135 | u32 offset, u32 size) { |
| 169 | if (use_assembly_shaders) { | 136 | if (use_assembly_shaders) { |
| 170 | const GLuint sub_buffer = buffer.SubBuffer(offset); | 137 | GLuint handle; |
| 171 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0, | 138 | if (offset != 0) { |
| 139 | handle = copy_uniforms[stage][binding_index].handle; | ||
| 140 | glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||
| 141 | } else { | ||
| 142 | handle = buffer.Handle(); | ||
| 143 | } | ||
| 144 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | ||
| 172 | static_cast<GLsizeiptr>(size)); | 145 | static_cast<GLsizeiptr>(size)); |
| 173 | } else { | 146 | } else { |
| 174 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 147 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; |
| @@ -181,8 +154,15 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff | |||
| 181 | void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, | 154 | void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 182 | u32 size) { | 155 | u32 size) { |
| 183 | if (use_assembly_shaders) { | 156 | if (use_assembly_shaders) { |
| 184 | glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, | 157 | GLuint handle; |
| 185 | buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size)); | 158 | if (offset != 0) { |
| 159 | handle = copy_compute_uniforms[binding_index].handle; | ||
| 160 | glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||
| 161 | } else { | ||
| 162 | handle = buffer.Handle(); | ||
| 163 | } | ||
| 164 | glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0, | ||
| 165 | static_cast<GLsizeiptr>(size)); | ||
| 186 | } else { | 166 | } else { |
| 187 | glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), | 167 | glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), |
| 188 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 168 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f4d8871a9..35c9deb51 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -39,8 +39,6 @@ public: | |||
| 39 | 39 | ||
| 40 | void MakeResident(GLenum access) noexcept; | 40 | void MakeResident(GLenum access) noexcept; |
| 41 | 41 | ||
| 42 | [[nodiscard]] GLuint SubBuffer(u32 offset); | ||
| 43 | |||
| 44 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | 42 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |
| 45 | return address; | 43 | return address; |
| 46 | } | 44 | } |
| @@ -50,13 +48,9 @@ public: | |||
| 50 | } | 48 | } |
| 51 | 49 | ||
| 52 | private: | 50 | private: |
| 53 | void CreateMemoryObjects(BufferCacheRuntime& runtime); | ||
| 54 | |||
| 55 | GLuint64EXT address = 0; | 51 | GLuint64EXT address = 0; |
| 56 | Vulkan::MemoryCommit memory_commit; | ||
| 57 | OGLBuffer buffer; | 52 | OGLBuffer buffer; |
| 58 | GLenum current_residency_access = GL_NONE; | 53 | GLenum current_residency_access = GL_NONE; |
| 59 | std::vector<std::pair<OGLBuffer, u32>> subs; | ||
| 60 | }; | 54 | }; |
| 61 | 55 | ||
| 62 | class BufferCacheRuntime { | 56 | class BufferCacheRuntime { |
| @@ -127,7 +121,7 @@ public: | |||
| 127 | } | 121 | } |
| 128 | 122 | ||
| 129 | [[nodiscard]] bool HasFastBufferSubData() const noexcept { | 123 | [[nodiscard]] bool HasFastBufferSubData() const noexcept { |
| 130 | return device.HasFastBufferSubData(); | 124 | return has_fast_buffer_sub_data; |
| 131 | } | 125 | } |
| 132 | 126 | ||
| 133 | private: | 127 | private: |
| @@ -140,16 +134,22 @@ private: | |||
| 140 | const Device& device; | 134 | const Device& device; |
| 141 | const Vulkan::Device* vulkan_device; | 135 | const Vulkan::Device* vulkan_device; |
| 142 | Vulkan::MemoryAllocator* vulkan_memory_allocator; | 136 | Vulkan::MemoryAllocator* vulkan_memory_allocator; |
| 143 | std::optional<StreamBuffer> stream_buffer; | ||
| 144 | |||
| 145 | u32 max_attributes = 0; | ||
| 146 | 137 | ||
| 138 | bool has_fast_buffer_sub_data = false; | ||
| 147 | bool use_assembly_shaders = false; | 139 | bool use_assembly_shaders = false; |
| 148 | bool has_unified_vertex_buffers = false; | 140 | bool has_unified_vertex_buffers = false; |
| 149 | 141 | ||
| 142 | u32 max_attributes = 0; | ||
| 143 | |||
| 144 | std::optional<StreamBuffer> stream_buffer; | ||
| 145 | |||
| 150 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | 146 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, |
| 151 | VideoCommon::NUM_STAGES> | 147 | VideoCommon::NUM_STAGES> |
| 152 | fast_uniforms; | 148 | fast_uniforms; |
| 149 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||
| 150 | VideoCommon::NUM_STAGES> | ||
| 151 | copy_uniforms; | ||
| 152 | std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; | ||
| 153 | 153 | ||
| 154 | u32 index_buffer_offset = 0; | 154 | u32 index_buffer_offset = 0; |
| 155 | }; | 155 | }; |