diff options
| author | 2023-11-19 17:17:16 -0500 | |
|---|---|---|
| committer | 2023-11-19 17:17:16 -0500 | |
| commit | a595ed499d3a21a251c3376c0c34a589c32088a9 (patch) | |
| tree | 9a0c90aa176447c7ee876d76a674b303e3e66d9a /src | |
| parent | Merge pull request #12036 from FernandoS27/you-should-have-more-than-one-towel (diff) | |
| download | yuzu-a595ed499d3a21a251c3376c0c34a589c32088a9.tar.gz yuzu-a595ed499d3a21a251c3376c0c34a589c32088a9.tar.xz yuzu-a595ed499d3a21a251c3376c0c34a589c32088a9.zip | |
gl_buffer_cache: Batch vertex/tfb buffer binding
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 17 |
5 files changed, 36 insertions, 64 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 38d553d3c..9d5209e97 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -53,13 +53,11 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast | |||
| 53 | VAddr cpu_addr_, u64 size_bytes_) | 53 | VAddr cpu_addr_, u64 size_bytes_) |
| 54 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | 54 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { |
| 55 | buffer.Create(); | 55 | buffer.Create(); |
| 56 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | 56 | if (runtime.device.HasDebuggingToolAttached()) { |
| 57 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | 57 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |
| 58 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | 58 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); |
| 59 | |||
| 60 | if (runtime.has_unified_vertex_buffers) { | ||
| 61 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||
| 62 | } | 59 | } |
| 60 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||
| 63 | } | 61 | } |
| 64 | 62 | ||
| 65 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { | 63 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { |
| @@ -111,7 +109,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, | |||
| 111 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, | 109 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, |
| 112 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | 110 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, |
| 113 | use_assembly_shaders{device.UseAssemblyShaders()}, | 111 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| 114 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | ||
| 115 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | 112 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { |
| 116 | GLint gl_max_attributes; | 113 | GLint gl_max_attributes; |
| 117 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | 114 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); |
| @@ -123,16 +120,18 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, | |||
| 123 | GL_STREAM_DRAW); | 120 | GL_STREAM_DRAW); |
| 124 | } | 121 | } |
| 125 | } | 122 | } |
| 126 | for (auto& stage_uniforms : copy_uniforms) { | 123 | if (use_assembly_shaders) { |
| 127 | for (OGLBuffer& buffer : stage_uniforms) { | 124 | for (auto& stage_uniforms : copy_uniforms) { |
| 125 | for (OGLBuffer& buffer : stage_uniforms) { | ||
| 126 | buffer.Create(); | ||
| 127 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | for (OGLBuffer& buffer : copy_compute_uniforms) { | ||
| 128 | buffer.Create(); | 131 | buffer.Create(); |
| 129 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | 132 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); |
| 130 | } | 133 | } |
| 131 | } | 134 | } |
| 132 | for (OGLBuffer& buffer : copy_compute_uniforms) { | ||
| 133 | buffer.Create(); | ||
| 134 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 135 | } | ||
| 136 | 135 | ||
| 137 | device_access_memory = [this]() -> u64 { | 136 | device_access_memory = [this]() -> u64 { |
| 138 | if (device.CanReportMemoryUsage()) { | 137 | if (device.CanReportMemoryUsage()) { |
| @@ -206,14 +205,8 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz | |||
| 206 | } | 205 | } |
| 207 | 206 | ||
| 208 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { | 207 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { |
| 209 | if (has_unified_vertex_buffers) { | 208 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); |
| 210 | buffer.MakeResident(GL_READ_ONLY); | 209 | index_buffer_offset = offset; |
| 211 | glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, | ||
| 212 | static_cast<GLsizeiptr>(Common::AlignUp(size, 4))); | ||
| 213 | } else { | ||
| 214 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); | ||
| 215 | index_buffer_offset = offset; | ||
| 216 | } | ||
| 217 | } | 210 | } |
| 218 | 211 | ||
| 219 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, | 212 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, |
| @@ -221,24 +214,23 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, | |||
| 221 | if (index >= max_attributes) { | 214 | if (index >= max_attributes) { |
| 222 | return; | 215 | return; |
| 223 | } | 216 | } |
| 224 | if (has_unified_vertex_buffers) { | 217 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), |
| 225 | buffer.MakeResident(GL_READ_ONLY); | 218 | static_cast<GLsizei>(stride)); |
| 226 | glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); | ||
| 227 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, | ||
| 228 | buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); | ||
| 229 | } else { | ||
| 230 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 231 | static_cast<GLsizei>(stride)); | ||
| 232 | } | ||
| 233 | } | 219 | } |
| 234 | 220 | ||
| 235 | void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { | 221 | void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { |
| 236 | for (u32 index = 0; index < bindings.buffers.size(); ++index) { | 222 | // TODO: Should HostBindings provide the correct runtime types to avoid these transforms? |
| 237 | BindVertexBuffer(bindings.min_index + index, *bindings.buffers[index], | 223 | std::array<GLuint, 32> buffer_handles; |
| 238 | static_cast<u32>(bindings.offsets[index]), | 224 | std::array<GLsizei, 32> buffer_strides; |
| 239 | static_cast<u32>(bindings.sizes[index]), | 225 | std::ranges::transform(bindings.buffers, buffer_handles.begin(), |
| 240 | static_cast<u32>(bindings.strides[index])); | 226 | [](const Buffer* const buffer) { return buffer->Handle(); }); |
| 241 | } | 227 | std::ranges::transform(bindings.strides, buffer_strides.begin(), |
| 228 | [](u64 stride) { return static_cast<GLsizei>(stride); }); | ||
| 229 | const u32 count = | ||
| 230 | std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index); | ||
| 231 | glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(), | ||
| 232 | reinterpret_cast<const GLintptr*>(bindings.offsets.data()), | ||
| 233 | buffer_strides.data()); | ||
| 242 | } | 234 | } |
| 243 | 235 | ||
| 244 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | 236 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| @@ -330,11 +322,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, | |||
| 330 | } | 322 | } |
| 331 | 323 | ||
| 332 | void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) { | 324 | void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) { |
| 333 | for (u32 index = 0; index < bindings.buffers.size(); ++index) { | 325 | std::array<GLuint, 4> buffer_handles; |
| 334 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, bindings.buffers[index]->Handle(), | 326 | std::ranges::transform(bindings.buffers, buffer_handles.begin(), |
| 335 | static_cast<GLintptr>(bindings.offsets[index]), | 327 | [](const Buffer* const buffer) { return buffer->Handle(); }); |
| 336 | static_cast<GLsizeiptr>(bindings.sizes[index])); | 328 | glBindBuffersRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, |
| 337 | } | 329 | static_cast<GLsizei>(bindings.buffers.size()), buffer_handles.data(), |
| 330 | reinterpret_cast<const GLintptr*>(bindings.offsets.data()), | ||
| 331 | reinterpret_cast<const GLsizeiptr*>(bindings.strides.data())); | ||
| 338 | } | 332 | } |
| 339 | 333 | ||
| 340 | void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | 334 | void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 41b746f3b..8613037eb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -194,7 +194,6 @@ private: | |||
| 194 | 194 | ||
| 195 | bool has_fast_buffer_sub_data = false; | 195 | bool has_fast_buffer_sub_data = false; |
| 196 | bool use_assembly_shaders = false; | 196 | bool use_assembly_shaders = false; |
| 197 | bool has_unified_vertex_buffers = false; | ||
| 198 | 197 | ||
| 199 | bool use_storage_buffers = false; | 198 | bool use_storage_buffers = false; |
| 200 | 199 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 94258ccd0..46d88c664 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -200,7 +200,6 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 200 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); | 200 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); |
| 201 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 201 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 202 | has_derivative_control = GLAD_GL_ARB_derivative_control; | 202 | has_derivative_control = GLAD_GL_ARB_derivative_control; |
| 203 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | ||
| 204 | has_debugging_tool_attached = IsDebugToolAttached(extensions); | 203 | has_debugging_tool_attached = IsDebugToolAttached(extensions); |
| 205 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); | 204 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); |
| 206 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; | 205 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a5a6bbbba..96034ea4a 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -72,10 +72,6 @@ public: | |||
| 72 | return has_texture_shadow_lod; | 72 | return has_texture_shadow_lod; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | bool HasVertexBufferUnifiedMemory() const { | ||
| 76 | return has_vertex_buffer_unified_memory; | ||
| 77 | } | ||
| 78 | |||
| 79 | bool HasASTC() const { | 75 | bool HasASTC() const { |
| 80 | return has_astc; | 76 | return has_astc; |
| 81 | } | 77 | } |
| @@ -215,7 +211,6 @@ private: | |||
| 215 | bool has_vertex_viewport_layer{}; | 211 | bool has_vertex_viewport_layer{}; |
| 216 | bool has_image_load_formatted{}; | 212 | bool has_image_load_formatted{}; |
| 217 | bool has_texture_shadow_lod{}; | 213 | bool has_texture_shadow_lod{}; |
| 218 | bool has_vertex_buffer_unified_memory{}; | ||
| 219 | bool has_astc{}; | 214 | bool has_astc{}; |
| 220 | bool has_variable_aoffi{}; | 215 | bool has_variable_aoffi{}; |
| 221 | bool has_component_indexing_bug{}; | 216 | bool has_component_indexing_bug{}; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b8d4e554..6bfed08a1 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -168,15 +168,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
| 168 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | 168 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { |
| 169 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | 169 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); |
| 170 | } | 170 | } |
| 171 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 172 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 173 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 174 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 175 | |||
| 176 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 177 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 178 | &vertex_buffer_address); | ||
| 179 | } | ||
| 180 | } | 171 | } |
| 181 | 172 | ||
| 182 | RendererOpenGL::~RendererOpenGL() = default; | 173 | RendererOpenGL::~RendererOpenGL() = default; |
| @@ -680,13 +671,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 680 | offsetof(ScreenRectVertex, tex_coord)); | 671 | offsetof(ScreenRectVertex, tex_coord)); |
| 681 | glVertexAttribBinding(PositionLocation, 0); | 672 | glVertexAttribBinding(PositionLocation, 0); |
| 682 | glVertexAttribBinding(TexCoordLocation, 0); | 673 | glVertexAttribBinding(TexCoordLocation, 0); |
| 683 | if (device.HasVertexBufferUnifiedMemory()) { | 674 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); |
| 684 | glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); | ||
| 685 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, | ||
| 686 | sizeof(vertices)); | ||
| 687 | } else { | ||
| 688 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | ||
| 689 | } | ||
| 690 | 675 | ||
| 691 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { | 676 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { |
| 692 | glBindSampler(0, present_sampler.handle); | 677 | glBindSampler(0, present_sampler.handle); |