summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2019-11-06 10:08:55 -0500
committerGravatar GitHub2019-11-06 10:08:55 -0500
commit468576284d8e102f84f456a7d4ab3701c3e0280a (patch)
treed0530d0795ca205447fabe757c4e8f057daf16c9
parentMerge pull request #3076 from DarkLordZach/telem-names (diff)
parentgl_rasterizer: Re-enable stream buffer memory due to global memory (diff)
downloadyuzu-468576284d8e102f84f456a7d4ab3701c3e0280a.tar.gz
yuzu-468576284d8e102f84f456a7d4ab3701c3e0280a.tar.xz
yuzu-468576284d8e102f84f456a7d4ab3701c3e0280a.zip
Merge pull request #3057 from ReinUsesLisp/buffer-sub-data
gl_rasterizer: Upload constant buffers with glNamedBufferSubData
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
6 files changed, 70 insertions, 11 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2442ddfd6..63b3a8205 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,7 +30,7 @@ public:
30 using BufferInfo = std::pair<const TBufferType*, u64>; 30 using BufferInfo = std::pair<const TBufferType*, u64>;
31 31
32 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 32 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
33 bool is_written = false) { 33 bool is_written = false, bool use_fast_cbuf = false) {
34 std::lock_guard lock{mutex}; 34 std::lock_guard lock{mutex};
35 35
36 auto& memory_manager = system.GPU().MemoryManager(); 36 auto& memory_manager = system.GPU().MemoryManager();
@@ -43,9 +43,13 @@ public:
43 // Cache management is a big overhead, so only cache entries with a given size. 43 // Cache management is a big overhead, so only cache entries with a given size.
44 // TODO: Figure out which size is the best for given games. 44 // TODO: Figure out which size is the best for given games.
45 constexpr std::size_t max_stream_size = 0x800; 45 constexpr std::size_t max_stream_size = 0x800;
46 if (size < max_stream_size) { 46 if (use_fast_cbuf || size < max_stream_size) {
47 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { 47 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
48 return StreamBufferUpload(host_ptr, size, alignment); 48 if (use_fast_cbuf) {
49 return ConstBufferUpload(host_ptr, size);
50 } else {
51 return StreamBufferUpload(host_ptr, size, alignment);
52 }
49 } 53 }
50 } 54 }
51 55
@@ -152,6 +156,10 @@ protected:
152 virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, 156 virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
153 std::size_t dst_offset, std::size_t size) = 0; 157 std::size_t dst_offset, std::size_t size) = 0;
154 158
159 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
160 return {};
161 }
162
155 /// Register an object into the cache 163 /// Register an object into the cache
156 void Register(const MapInterval& new_map, bool inherit_written = false) { 164 void Register(const MapInterval& new_map, bool inherit_written = false) {
157 const CacheAddr cache_ptr = new_map->GetStart(); 165 const CacheAddr cache_ptr = new_map->GetStart();
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index f8a807c84..0375fca17 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,13 +8,17 @@
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/engines/maxwell_3d.h"
11#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_opengl/gl_buffer_cache.h" 13#include "video_core/renderer_opengl/gl_buffer_cache.h"
14#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h" 15#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
15 17
16namespace OpenGL { 18namespace OpenGL {
17 19
20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21
18MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
19 23
20CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) 24CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
@@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz
26CachedBufferBlock::~CachedBufferBlock() = default; 30CachedBufferBlock::~CachedBufferBlock() = default;
27 31
28OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 32OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
29 std::size_t stream_size) 33 const Device& device, std::size_t stream_size)
30 : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{ 34 : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
31 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} 35 if (!device.HasFastBufferSubData()) {
36 return;
37 }
38
39 static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
40 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
41 for (const GLuint cbuf : cbufs) {
42 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
43 }
44}
32 45
33OGLBufferCache::~OGLBufferCache() = default; 46OGLBufferCache::~OGLBufferCache() {
47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
48}
34 49
35Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 50Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
36 return std::make_shared<CachedBufferBlock>(cache_addr, size); 51 return std::make_shared<CachedBufferBlock>(cache_addr, size);
@@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
69 static_cast<GLsizeiptr>(size)); 84 static_cast<GLsizeiptr>(size));
70} 85}
71 86
87OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
88 std::size_t size) {
89 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
90 const GLuint& cbuf = cbufs[cbuf_cursor++];
91 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
92 return {&cbuf, 0};
93}
94
72} // namespace OpenGL 95} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 022e7bfa9..8c7145443 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,10 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <memory> 8#include <memory>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h"
11#include "video_core/rasterizer_cache.h" 13#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_stream_buffer.h" 15#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -18,12 +20,14 @@ class System;
18 20
19namespace OpenGL { 21namespace OpenGL {
20 22
23class Device;
21class OGLStreamBuffer; 24class OGLStreamBuffer;
22class RasterizerOpenGL; 25class RasterizerOpenGL;
23 26
24class CachedBufferBlock; 27class CachedBufferBlock;
25 28
26using Buffer = std::shared_ptr<CachedBufferBlock>; 29using Buffer = std::shared_ptr<CachedBufferBlock>;
30using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
27 31
28class CachedBufferBlock : public VideoCommon::BufferBlock { 32class CachedBufferBlock : public VideoCommon::BufferBlock {
29public: 33public:
@@ -38,14 +42,18 @@ private:
38 OGLBuffer gl_buffer{}; 42 OGLBuffer gl_buffer{};
39}; 43};
40 44
41class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> { 45class OGLBufferCache final : public GenericBufferCache {
42public: 46public:
43 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 47 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
44 std::size_t stream_size); 48 const Device& device, std::size_t stream_size);
45 ~OGLBufferCache(); 49 ~OGLBufferCache();
46 50
47 const GLuint* GetEmptyBuffer(std::size_t) override; 51 const GLuint* GetEmptyBuffer(std::size_t) override;
48 52
53 void Acquire() noexcept {
54 cbuf_cursor = 0;
55 }
56
49protected: 57protected:
50 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
51 59
@@ -61,6 +69,14 @@ protected:
61 69
62 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, 70 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
63 std::size_t dst_offset, std::size_t size) override; 71 std::size_t dst_offset, std::size_t size) override;
72
73 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
74
75private:
76 std::size_t cbuf_cursor = 0;
77 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
78 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
79 cbufs;
64}; 80};
65 81
66} // namespace OpenGL 82} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 64de7e425..c65b24c69 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -51,8 +51,11 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
51} // Anonymous namespace 51} // Anonymous namespace
52 52
53Device::Device() { 53Device::Device() {
54 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
54 const std::vector extensions = GetExtensions(); 55 const std::vector extensions = GetExtensions();
55 56
57 const bool is_nvidia = vendor == "NVIDIA Corporation";
58
56 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 59 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
57 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 60 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
58 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 61 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -64,6 +67,7 @@ Device::Device() {
64 has_variable_aoffi = TestVariableAoffi(); 67 has_variable_aoffi = TestVariableAoffi();
65 has_component_indexing_bug = TestComponentIndexingBug(); 68 has_component_indexing_bug = TestComponentIndexingBug();
66 has_precise_bug = TestPreciseBug(); 69 has_precise_bug = TestPreciseBug();
70 has_fast_buffer_sub_data = is_nvidia;
67 71
68 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 72 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
69 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); 73 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bb273c3d6..bf35bd0b6 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -54,6 +54,10 @@ public:
54 return has_precise_bug; 54 return has_precise_bug;
55 } 55 }
56 56
57 bool HasFastBufferSubData() const {
58 return has_fast_buffer_sub_data;
59 }
60
57private: 61private:
58 static bool TestVariableAoffi(); 62 static bool TestVariableAoffi();
59 static bool TestComponentIndexingBug(); 63 static bool TestComponentIndexingBug();
@@ -69,6 +73,7 @@ private:
69 bool has_variable_aoffi{}; 73 bool has_variable_aoffi{};
70 bool has_component_indexing_bug{}; 74 bool has_component_indexing_bug{};
71 bool has_precise_bug{}; 75 bool has_precise_bug{};
76 bool has_fast_buffer_sub_data{};
72}; 77};
73 78
74} // namespace OpenGL 79} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d1e147db8..e560d70d5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -67,7 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
67RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 67RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
68 ScreenInfo& info) 68 ScreenInfo& info)
69 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, 69 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
70 system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { 70 system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
71 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 71 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
72 state.draw.shader_program = 0; 72 state.draw.shader_program = 0;
73 state.Apply(); 73 state.Apply();
@@ -558,6 +558,8 @@ void RasterizerOpenGL::DrawPrelude() {
558 SyncPolygonOffset(); 558 SyncPolygonOffset();
559 SyncAlphaTest(); 559 SyncAlphaTest();
560 560
561 buffer_cache.Acquire();
562
561 // Draw the vertex batch 563 // Draw the vertex batch
562 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 564 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
563 565
@@ -879,7 +881,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
879 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); 881 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
880 882
881 const auto alignment = device.GetUniformBufferAlignment(); 883 const auto alignment = device.GetUniformBufferAlignment();
882 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); 884 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
885 device.HasFastBufferSubData());
883 bind_ubo_pushbuffer.Push(cbuf, offset, size); 886 bind_ubo_pushbuffer.Push(cbuf, offset, size);
884} 887}
885 888