diff options
| author | 2023-12-03 16:27:22 -0500 | |
|---|---|---|
| committer | 2023-12-03 16:27:22 -0500 | |
| commit | 5fb1a83e4cbb43f5bb2e07d003e544dca580e0de (patch) | |
| tree | 0f6cce2106f02552c4fe3ce085108c4c4eb0f5af /src | |
| parent | Merge pull request #12196 from ameerj/glsl-cbuf-sizes (diff) | |
| parent | gl_buffer_cache: Batch vertex/tfb buffer binding (diff) | |
| download | yuzu-5fb1a83e4cbb43f5bb2e07d003e544dca580e0de.tar.gz yuzu-5fb1a83e4cbb43f5bb2e07d003e544dca580e0de.tar.xz yuzu-5fb1a83e4cbb43f5bb2e07d003e544dca580e0de.zip | |
Merge pull request #12094 from ameerj/gl-buffer-cache-batch-vtx
gl_buffer_cache: Batch vertex/tfb buffer binding
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 17 |
5 files changed, 36 insertions, 64 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ed188b435..e6c70fb34 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -53,13 +53,11 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast | |||
| 53 | VAddr cpu_addr_, u64 size_bytes_) | 53 | VAddr cpu_addr_, u64 size_bytes_) |
| 54 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | 54 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { |
| 55 | buffer.Create(); | 55 | buffer.Create(); |
| 56 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | 56 | if (runtime.device.HasDebuggingToolAttached()) { |
| 57 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | 57 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |
| 58 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | 58 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); |
| 59 | |||
| 60 | if (runtime.has_unified_vertex_buffers) { | ||
| 61 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||
| 62 | } | 59 | } |
| 60 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||
| 63 | } | 61 | } |
| 64 | 62 | ||
| 65 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { | 63 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { |
| @@ -111,7 +109,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, | |||
| 111 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, | 109 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, |
| 112 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | 110 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, |
| 113 | use_assembly_shaders{device.UseAssemblyShaders()}, | 111 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| 114 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | ||
| 115 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | 112 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { |
| 116 | GLint gl_max_attributes; | 113 | GLint gl_max_attributes; |
| 117 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | 114 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); |
| @@ -123,16 +120,18 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, | |||
| 123 | GL_STREAM_DRAW); | 120 | GL_STREAM_DRAW); |
| 124 | } | 121 | } |
| 125 | } | 122 | } |
| 126 | for (auto& stage_uniforms : copy_uniforms) { | 123 | if (use_assembly_shaders) { |
| 127 | for (OGLBuffer& buffer : stage_uniforms) { | 124 | for (auto& stage_uniforms : copy_uniforms) { |
| 125 | for (OGLBuffer& buffer : stage_uniforms) { | ||
| 126 | buffer.Create(); | ||
| 127 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | for (OGLBuffer& buffer : copy_compute_uniforms) { | ||
| 128 | buffer.Create(); | 131 | buffer.Create(); |
| 129 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | 132 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); |
| 130 | } | 133 | } |
| 131 | } | 134 | } |
| 132 | for (OGLBuffer& buffer : copy_compute_uniforms) { | ||
| 133 | buffer.Create(); | ||
| 134 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 135 | } | ||
| 136 | 135 | ||
| 137 | device_access_memory = [this]() -> u64 { | 136 | device_access_memory = [this]() -> u64 { |
| 138 | if (device.CanReportMemoryUsage()) { | 137 | if (device.CanReportMemoryUsage()) { |
| @@ -211,14 +210,8 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz | |||
| 211 | } | 210 | } |
| 212 | 211 | ||
| 213 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { | 212 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { |
| 214 | if (has_unified_vertex_buffers) { | 213 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); |
| 215 | buffer.MakeResident(GL_READ_ONLY); | 214 | index_buffer_offset = offset; |
| 216 | glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, | ||
| 217 | static_cast<GLsizeiptr>(Common::AlignUp(size, 4))); | ||
| 218 | } else { | ||
| 219 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); | ||
| 220 | index_buffer_offset = offset; | ||
| 221 | } | ||
| 222 | } | 215 | } |
| 223 | 216 | ||
| 224 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, | 217 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, |
| @@ -226,24 +219,23 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, | |||
| 226 | if (index >= max_attributes) { | 219 | if (index >= max_attributes) { |
| 227 | return; | 220 | return; |
| 228 | } | 221 | } |
| 229 | if (has_unified_vertex_buffers) { | 222 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), |
| 230 | buffer.MakeResident(GL_READ_ONLY); | 223 | static_cast<GLsizei>(stride)); |
| 231 | glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); | ||
| 232 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, | ||
| 233 | buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); | ||
| 234 | } else { | ||
| 235 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 236 | static_cast<GLsizei>(stride)); | ||
| 237 | } | ||
| 238 | } | 224 | } |
| 239 | 225 | ||
| 240 | void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { | 226 | void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { |
| 241 | for (u32 index = 0; index < bindings.buffers.size(); ++index) { | 227 | // TODO: Should HostBindings provide the correct runtime types to avoid these transforms? |
| 242 | BindVertexBuffer(bindings.min_index + index, *bindings.buffers[index], | 228 | std::array<GLuint, 32> buffer_handles; |
| 243 | static_cast<u32>(bindings.offsets[index]), | 229 | std::array<GLsizei, 32> buffer_strides; |
| 244 | static_cast<u32>(bindings.sizes[index]), | 230 | std::ranges::transform(bindings.buffers, buffer_handles.begin(), |
| 245 | static_cast<u32>(bindings.strides[index])); | 231 | [](const Buffer* const buffer) { return buffer->Handle(); }); |
| 246 | } | 232 | std::ranges::transform(bindings.strides, buffer_strides.begin(), |
| 233 | [](u64 stride) { return static_cast<GLsizei>(stride); }); | ||
| 234 | const u32 count = | ||
| 235 | std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index); | ||
| 236 | glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(), | ||
| 237 | reinterpret_cast<const GLintptr*>(bindings.offsets.data()), | ||
| 238 | buffer_strides.data()); | ||
| 247 | } | 239 | } |
| 248 | 240 | ||
| 249 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | 241 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| @@ -335,11 +327,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, | |||
| 335 | } | 327 | } |
| 336 | 328 | ||
| 337 | void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) { | 329 | void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) { |
| 338 | for (u32 index = 0; index < bindings.buffers.size(); ++index) { | 330 | std::array<GLuint, 4> buffer_handles; |
| 339 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, bindings.buffers[index]->Handle(), | 331 | std::ranges::transform(bindings.buffers, buffer_handles.begin(), |
| 340 | static_cast<GLintptr>(bindings.offsets[index]), | 332 | [](const Buffer* const buffer) { return buffer->Handle(); }); |
| 341 | static_cast<GLsizeiptr>(bindings.sizes[index])); | 333 | glBindBuffersRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, |
| 342 | } | 334 | static_cast<GLsizei>(bindings.buffers.size()), buffer_handles.data(), |
| 335 | reinterpret_cast<const GLintptr*>(bindings.offsets.data()), | ||
| 336 | reinterpret_cast<const GLsizeiptr*>(bindings.strides.data())); | ||
| 343 | } | 337 | } |
| 344 | 338 | ||
| 345 | void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | 339 | void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 1e8708f59..71cd45d35 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -209,7 +209,6 @@ private: | |||
| 209 | 209 | ||
| 210 | bool has_fast_buffer_sub_data = false; | 210 | bool has_fast_buffer_sub_data = false; |
| 211 | bool use_assembly_shaders = false; | 211 | bool use_assembly_shaders = false; |
| 212 | bool has_unified_vertex_buffers = false; | ||
| 213 | 212 | ||
| 214 | bool use_storage_buffers = false; | 213 | bool use_storage_buffers = false; |
| 215 | 214 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 993438a27..a6c93068f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -200,7 +200,6 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 200 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); | 200 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); |
| 201 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 201 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 202 | has_derivative_control = GLAD_GL_ARB_derivative_control; | 202 | has_derivative_control = GLAD_GL_ARB_derivative_control; |
| 203 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | ||
| 204 | has_debugging_tool_attached = IsDebugToolAttached(extensions); | 203 | has_debugging_tool_attached = IsDebugToolAttached(extensions); |
| 205 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); | 204 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); |
| 206 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; | 205 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a5a6bbbba..96034ea4a 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -72,10 +72,6 @@ public: | |||
| 72 | return has_texture_shadow_lod; | 72 | return has_texture_shadow_lod; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | bool HasVertexBufferUnifiedMemory() const { | ||
| 76 | return has_vertex_buffer_unified_memory; | ||
| 77 | } | ||
| 78 | |||
| 79 | bool HasASTC() const { | 75 | bool HasASTC() const { |
| 80 | return has_astc; | 76 | return has_astc; |
| 81 | } | 77 | } |
| @@ -215,7 +211,6 @@ private: | |||
| 215 | bool has_vertex_viewport_layer{}; | 211 | bool has_vertex_viewport_layer{}; |
| 216 | bool has_image_load_formatted{}; | 212 | bool has_image_load_formatted{}; |
| 217 | bool has_texture_shadow_lod{}; | 213 | bool has_texture_shadow_lod{}; |
| 218 | bool has_vertex_buffer_unified_memory{}; | ||
| 219 | bool has_astc{}; | 214 | bool has_astc{}; |
| 220 | bool has_variable_aoffi{}; | 215 | bool has_variable_aoffi{}; |
| 221 | bool has_component_indexing_bug{}; | 216 | bool has_component_indexing_bug{}; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b8d4e554..6bfed08a1 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -168,15 +168,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
| 168 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | 168 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { |
| 169 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | 169 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); |
| 170 | } | 170 | } |
| 171 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 172 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 173 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 174 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 175 | |||
| 176 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 177 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 178 | &vertex_buffer_address); | ||
| 179 | } | ||
| 180 | } | 171 | } |
| 181 | 172 | ||
| 182 | RendererOpenGL::~RendererOpenGL() = default; | 173 | RendererOpenGL::~RendererOpenGL() = default; |
| @@ -680,13 +671,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 680 | offsetof(ScreenRectVertex, tex_coord)); | 671 | offsetof(ScreenRectVertex, tex_coord)); |
| 681 | glVertexAttribBinding(PositionLocation, 0); | 672 | glVertexAttribBinding(PositionLocation, 0); |
| 682 | glVertexAttribBinding(TexCoordLocation, 0); | 673 | glVertexAttribBinding(TexCoordLocation, 0); |
| 683 | if (device.HasVertexBufferUnifiedMemory()) { | 674 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); |
| 684 | glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); | ||
| 685 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, | ||
| 686 | sizeof(vertices)); | ||
| 687 | } else { | ||
| 688 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | ||
| 689 | } | ||
| 690 | 675 | ||
| 691 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { | 676 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { |
| 692 | glBindSampler(0, present_sampler.handle); | 677 | glBindSampler(0, present_sampler.handle); |