diff options
| author | 2019-11-06 10:08:55 -0500 | |
|---|---|---|
| committer | 2019-11-06 10:08:55 -0500 | |
| commit | 468576284d8e102f84f456a7d4ab3701c3e0280a (patch) | |
| tree | d0530d0795ca205447fabe757c4e8f057daf16c9 | |
| parent | Merge pull request #3076 from DarkLordZach/telem-names (diff) | |
| parent | gl_rasterizer: Re-enable stream buffer memory due to global memory (diff) | |
| download | yuzu-468576284d8e102f84f456a7d4ab3701c3e0280a.tar.gz yuzu-468576284d8e102f84f456a7d4ab3701c3e0280a.tar.xz yuzu-468576284d8e102f84f456a7d4ab3701c3e0280a.zip | |
Merge pull request #3057 from ReinUsesLisp/buffer-sub-data
gl_rasterizer: Upload constant buffers with glNamedBufferSubData
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 7 |
6 files changed, 70 insertions, 11 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2442ddfd6..63b3a8205 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -30,7 +30,7 @@ public: | |||
| 30 | using BufferInfo = std::pair<const TBufferType*, u64>; | 30 | using BufferInfo = std::pair<const TBufferType*, u64>; |
| 31 | 31 | ||
| 32 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | 32 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
| 33 | bool is_written = false) { | 33 | bool is_written = false, bool use_fast_cbuf = false) { |
| 34 | std::lock_guard lock{mutex}; | 34 | std::lock_guard lock{mutex}; |
| 35 | 35 | ||
| 36 | auto& memory_manager = system.GPU().MemoryManager(); | 36 | auto& memory_manager = system.GPU().MemoryManager(); |
| @@ -43,9 +43,13 @@ public: | |||
| 43 | // Cache management is a big overhead, so only cache entries with a given size. | 43 | // Cache management is a big overhead, so only cache entries with a given size. |
| 44 | // TODO: Figure out which size is the best for given games. | 44 | // TODO: Figure out which size is the best for given games. |
| 45 | constexpr std::size_t max_stream_size = 0x800; | 45 | constexpr std::size_t max_stream_size = 0x800; |
| 46 | if (size < max_stream_size) { | 46 | if (use_fast_cbuf || size < max_stream_size) { |
| 47 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | 47 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { |
| 48 | return StreamBufferUpload(host_ptr, size, alignment); | 48 | if (use_fast_cbuf) { |
| 49 | return ConstBufferUpload(host_ptr, size); | ||
| 50 | } else { | ||
| 51 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 52 | } | ||
| 49 | } | 53 | } |
| 50 | } | 54 | } |
| 51 | 55 | ||
| @@ -152,6 +156,10 @@ protected: | |||
| 152 | virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, | 156 | virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, |
| 153 | std::size_t dst_offset, std::size_t size) = 0; | 157 | std::size_t dst_offset, std::size_t size) = 0; |
| 154 | 158 | ||
| 159 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | ||
| 160 | return {}; | ||
| 161 | } | ||
| 162 | |||
| 155 | /// Register an object into the cache | 163 | /// Register an object into the cache |
| 156 | void Register(const MapInterval& new_map, bool inherit_written = false) { | 164 | void Register(const MapInterval& new_map, bool inherit_written = false) { |
| 157 | const CacheAddr cache_ptr = new_map->GetStart(); | 165 | const CacheAddr cache_ptr = new_map->GetStart(); |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index f8a807c84..0375fca17 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -8,13 +8,17 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 13 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 14 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 15 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | 17 | ||
| 16 | namespace OpenGL { | 18 | namespace OpenGL { |
| 17 | 19 | ||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 21 | |||
| 18 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 22 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 19 | 23 | ||
| 20 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) | 24 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) |
| @@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz | |||
| 26 | CachedBufferBlock::~CachedBufferBlock() = default; | 30 | CachedBufferBlock::~CachedBufferBlock() = default; |
| 27 | 31 | ||
| 28 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 32 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 29 | std::size_t stream_size) | 33 | const Device& device, std::size_t stream_size) |
| 30 | : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{ | 34 | : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { |
| 31 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} | 35 | if (!device.HasFastBufferSubData()) { |
| 36 | return; | ||
| 37 | } | ||
| 38 | |||
| 39 | static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); | ||
| 40 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | ||
| 41 | for (const GLuint cbuf : cbufs) { | ||
| 42 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); | ||
| 43 | } | ||
| 44 | } | ||
| 32 | 45 | ||
| 33 | OGLBufferCache::~OGLBufferCache() = default; | 46 | OGLBufferCache::~OGLBufferCache() { |
| 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | ||
| 48 | } | ||
| 34 | 49 | ||
| 35 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | 50 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { |
| 36 | return std::make_shared<CachedBufferBlock>(cache_addr, size); | 51 | return std::make_shared<CachedBufferBlock>(cache_addr, size); |
| @@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t | |||
| 69 | static_cast<GLsizeiptr>(size)); | 84 | static_cast<GLsizeiptr>(size)); |
| 70 | } | 85 | } |
| 71 | 86 | ||
| 87 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | ||
| 88 | std::size_t size) { | ||
| 89 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | ||
| 90 | const GLuint& cbuf = cbufs[cbuf_cursor++]; | ||
| 91 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | ||
| 92 | return {&cbuf, 0}; | ||
| 93 | } | ||
| 94 | |||
| 72 | } // namespace OpenGL | 95 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 022e7bfa9..8c7145443 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -4,10 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 11 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/rasterizer_cache.h" | 13 | #include "video_core/rasterizer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 15 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| @@ -18,12 +20,14 @@ class System; | |||
| 18 | 20 | ||
| 19 | namespace OpenGL { | 21 | namespace OpenGL { |
| 20 | 22 | ||
| 23 | class Device; | ||
| 21 | class OGLStreamBuffer; | 24 | class OGLStreamBuffer; |
| 22 | class RasterizerOpenGL; | 25 | class RasterizerOpenGL; |
| 23 | 26 | ||
| 24 | class CachedBufferBlock; | 27 | class CachedBufferBlock; |
| 25 | 28 | ||
| 26 | using Buffer = std::shared_ptr<CachedBufferBlock>; | 29 | using Buffer = std::shared_ptr<CachedBufferBlock>; |
| 30 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | ||
| 27 | 31 | ||
| 28 | class CachedBufferBlock : public VideoCommon::BufferBlock { | 32 | class CachedBufferBlock : public VideoCommon::BufferBlock { |
| 29 | public: | 33 | public: |
| @@ -38,14 +42,18 @@ private: | |||
| 38 | OGLBuffer gl_buffer{}; | 42 | OGLBuffer gl_buffer{}; |
| 39 | }; | 43 | }; |
| 40 | 44 | ||
| 41 | class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> { | 45 | class OGLBufferCache final : public GenericBufferCache { |
| 42 | public: | 46 | public: |
| 43 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 47 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 44 | std::size_t stream_size); | 48 | const Device& device, std::size_t stream_size); |
| 45 | ~OGLBufferCache(); | 49 | ~OGLBufferCache(); |
| 46 | 50 | ||
| 47 | const GLuint* GetEmptyBuffer(std::size_t) override; | 51 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 48 | 52 | ||
| 53 | void Acquire() noexcept { | ||
| 54 | cbuf_cursor = 0; | ||
| 55 | } | ||
| 56 | |||
| 49 | protected: | 57 | protected: |
| 50 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | 58 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; |
| 51 | 59 | ||
| @@ -61,6 +69,14 @@ protected: | |||
| 61 | 69 | ||
| 62 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 70 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 63 | std::size_t dst_offset, std::size_t size) override; | 71 | std::size_t dst_offset, std::size_t size) override; |
| 72 | |||
| 73 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; | ||
| 74 | |||
| 75 | private: | ||
| 76 | std::size_t cbuf_cursor = 0; | ||
| 77 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 78 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> | ||
| 79 | cbufs; | ||
| 64 | }; | 80 | }; |
| 65 | 81 | ||
| 66 | } // namespace OpenGL | 82 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 64de7e425..c65b24c69 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -51,8 +51,11 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view | |||
| 51 | } // Anonymous namespace | 51 | } // Anonymous namespace |
| 52 | 52 | ||
| 53 | Device::Device() { | 53 | Device::Device() { |
| 54 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | ||
| 54 | const std::vector extensions = GetExtensions(); | 55 | const std::vector extensions = GetExtensions(); |
| 55 | 56 | ||
| 57 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | ||
| 58 | |||
| 56 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 59 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 57 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 60 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 58 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 61 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| @@ -64,6 +67,7 @@ Device::Device() { | |||
| 64 | has_variable_aoffi = TestVariableAoffi(); | 67 | has_variable_aoffi = TestVariableAoffi(); |
| 65 | has_component_indexing_bug = TestComponentIndexingBug(); | 68 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 66 | has_precise_bug = TestPreciseBug(); | 69 | has_precise_bug = TestPreciseBug(); |
| 70 | has_fast_buffer_sub_data = is_nvidia; | ||
| 67 | 71 | ||
| 68 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 72 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 69 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 73 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bb273c3d6..bf35bd0b6 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -54,6 +54,10 @@ public: | |||
| 54 | return has_precise_bug; | 54 | return has_precise_bug; |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | bool HasFastBufferSubData() const { | ||
| 58 | return has_fast_buffer_sub_data; | ||
| 59 | } | ||
| 60 | |||
| 57 | private: | 61 | private: |
| 58 | static bool TestVariableAoffi(); | 62 | static bool TestVariableAoffi(); |
| 59 | static bool TestComponentIndexingBug(); | 63 | static bool TestComponentIndexingBug(); |
| @@ -69,6 +73,7 @@ private: | |||
| 69 | bool has_variable_aoffi{}; | 73 | bool has_variable_aoffi{}; |
| 70 | bool has_component_indexing_bug{}; | 74 | bool has_component_indexing_bug{}; |
| 71 | bool has_precise_bug{}; | 75 | bool has_precise_bug{}; |
| 76 | bool has_fast_buffer_sub_data{}; | ||
| 72 | }; | 77 | }; |
| 73 | 78 | ||
| 74 | } // namespace OpenGL | 79 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d1e147db8..e560d70d5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -67,7 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf | |||
| 67 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 67 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 68 | ScreenInfo& info) | 68 | ScreenInfo& info) |
| 69 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | 69 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 70 | system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { | 70 | system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { |
| 71 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 71 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 72 | state.draw.shader_program = 0; | 72 | state.draw.shader_program = 0; |
| 73 | state.Apply(); | 73 | state.Apply(); |
| @@ -558,6 +558,8 @@ void RasterizerOpenGL::DrawPrelude() { | |||
| 558 | SyncPolygonOffset(); | 558 | SyncPolygonOffset(); |
| 559 | SyncAlphaTest(); | 559 | SyncAlphaTest(); |
| 560 | 560 | ||
| 561 | buffer_cache.Acquire(); | ||
| 562 | |||
| 561 | // Draw the vertex batch | 563 | // Draw the vertex batch |
| 562 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | 564 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; |
| 563 | 565 | ||
| @@ -879,7 +881,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 879 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | 881 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 880 | 882 | ||
| 881 | const auto alignment = device.GetUniformBufferAlignment(); | 883 | const auto alignment = device.GetUniformBufferAlignment(); |
| 882 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); | 884 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, |
| 885 | device.HasFastBufferSubData()); | ||
| 883 | bind_ubo_pushbuffer.Push(cbuf, offset, size); | 886 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 884 | } | 887 | } |
| 885 | 888 | ||