summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache.h299
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h76
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp154
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/renderer_opengl/utils.cpp48
-rw-r--r--src/video_core/renderer_opengl/utils.h41
16 files changed, 537 insertions, 407 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index cd32c65d3..7c18c27b3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,4 +1,5 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache.h
2 dma_pusher.cpp 3 dma_pusher.cpp
3 dma_pusher.h 4 dma_pusher.h
4 debug_utils/debug_utils.cpp 5 debug_utils/debug_utils.cpp
@@ -43,8 +44,6 @@ add_library(video_core STATIC
43 renderer_opengl/gl_device.h 44 renderer_opengl/gl_device.h
44 renderer_opengl/gl_framebuffer_cache.cpp 45 renderer_opengl/gl_framebuffer_cache.cpp
45 renderer_opengl/gl_framebuffer_cache.h 46 renderer_opengl/gl_framebuffer_cache.h
46 renderer_opengl/gl_global_cache.cpp
47 renderer_opengl/gl_global_cache.h
48 renderer_opengl/gl_rasterizer.cpp 47 renderer_opengl/gl_rasterizer.cpp
49 renderer_opengl/gl_rasterizer.h 48 renderer_opengl/gl_rasterizer.h
50 renderer_opengl/gl_resource_manager.cpp 49 renderer_opengl/gl_resource_manager.cpp
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h
new file mode 100644
index 000000000..6f868b8b4
--- /dev/null
+++ b/src/video_core/buffer_cache.h
@@ -0,0 +1,299 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_cache.h"
20
21namespace VideoCore {
22class RasterizerInterface;
23}
24
25namespace VideoCommon {
26
27template <typename BufferStorageType>
28class CachedBuffer final : public RasterizerCacheObject {
29public:
30 explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
31 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
32 ~CachedBuffer() override = default;
33
34 VAddr GetCpuAddr() const override {
35 return cpu_addr;
36 }
37
38 std::size_t GetSizeInBytes() const override {
39 return size;
40 }
41
42 u8* GetWritableHostPtr() const {
43 return host_ptr;
44 }
45
46 std::size_t GetSize() const {
47 return size;
48 }
49
50 std::size_t GetCapacity() const {
51 return capacity;
52 }
53
54 bool IsInternalized() const {
55 return is_internal;
56 }
57
58 const BufferStorageType& GetBuffer() const {
59 return buffer;
60 }
61
62 void SetSize(std::size_t new_size) {
63 size = new_size;
64 }
65
66 void SetInternalState(bool is_internal_) {
67 is_internal = is_internal_;
68 }
69
70 BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
71 capacity = new_capacity;
72 std::swap(buffer, buffer_);
73 return buffer_;
74 }
75
76private:
77 u8* host_ptr{};
78 VAddr cpu_addr{};
79 std::size_t size{};
80 std::size_t capacity{};
81 bool is_internal{};
82 BufferStorageType buffer;
83};
84
85template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
86class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
87public:
88 using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
89 using BufferInfo = std::pair<const BufferType*, u64>;
90
91 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
92 std::unique_ptr<StreamBuffer> stream_buffer)
93 : RasterizerCache<Buffer>{rasterizer}, system{system},
94 stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
95 this->stream_buffer->GetHandle()} {}
96 ~BufferCache() = default;
97
98 void Unregister(const Buffer& entry) override {
99 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
100 if (entry->IsInternalized()) {
101 internalized_entries.erase(entry->GetCacheAddr());
102 }
103 ReserveBuffer(entry);
104 RasterizerCache<Buffer>::Unregister(entry);
105 }
106
107 void TickFrame() {
108 marked_for_destruction_index =
109 (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
110 MarkedForDestruction().clear();
111 }
112
113 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
114 bool internalize = false, bool is_written = false) {
115 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
116
117 auto& memory_manager = system.GPU().MemoryManager();
118 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
119 if (!host_ptr) {
120 return {GetEmptyBuffer(size), 0};
121 }
122 const auto cache_addr = ToCacheAddr(host_ptr);
123
124 // Cache management is a big overhead, so only cache entries with a given size.
125 // TODO: Figure out which size is the best for given games.
126 constexpr std::size_t max_stream_size = 0x800;
127 if (!internalize && size < max_stream_size &&
128 internalized_entries.find(cache_addr) == internalized_entries.end()) {
129 return StreamBufferUpload(host_ptr, size, alignment);
130 }
131
132 auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
133 if (!entry) {
134 return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
135 }
136
137 if (entry->GetSize() < size) {
138 IncreaseBufferSize(entry, size);
139 }
140 if (is_written) {
141 entry->MarkAsModified(true, *this);
142 }
143 return {ToHandle(entry->GetBuffer()), 0};
144 }
145
146 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
147 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
148 std::size_t alignment = 4) {
149 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
150 return StreamBufferUpload(raw_pointer, size, alignment);
151 }
152
153 void Map(std::size_t max_size) {
154 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
155 buffer_offset = buffer_offset_base;
156 }
157
158 /// Finishes the upload stream, returns true on bindings invalidation.
159 bool Unmap() {
160 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
161 return std::exchange(invalidated, false);
162 }
163
164 virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
165
166protected:
167 void FlushObjectInner(const Buffer& entry) override {
168 DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
169 }
170
171 virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
172
173 virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
174
175 virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
176 std::size_t size, const u8* data) = 0;
177
178 virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
179 std::size_t size, u8* data) = 0;
180
181 virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
182 std::size_t src_offset, std::size_t dst_offset,
183 std::size_t size) = 0;
184
185private:
186 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
187 std::size_t alignment) {
188 AlignBuffer(alignment);
189 const std::size_t uploaded_offset = buffer_offset;
190 std::memcpy(buffer_ptr, raw_pointer, size);
191
192 buffer_ptr += size;
193 buffer_offset += size;
194 return {&stream_buffer_handle, uploaded_offset};
195 }
196
197 BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
198 bool internalize, bool is_written) {
199 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
200 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
201 ASSERT(cpu_addr);
202
203 auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
204 entry->SetSize(size);
205 entry->SetInternalState(internalize);
206 RasterizerCache<Buffer>::Register(entry);
207
208 if (internalize) {
209 internalized_entries.emplace(ToCacheAddr(host_ptr));
210 }
211 if (is_written) {
212 entry->MarkAsModified(true, *this);
213 }
214
215 if (entry->GetCapacity() < size) {
216 MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
217 }
218
219 UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
220 return {ToHandle(entry->GetBuffer()), 0};
221 }
222
223 void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
224 const std::size_t old_size = entry->GetSize();
225 if (entry->GetCapacity() < new_size) {
226 const auto& old_buffer = entry->GetBuffer();
227 auto new_buffer = CreateBuffer(new_size);
228
229 // Copy bits from the old buffer to the new buffer.
230 CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
231 MarkedForDestruction().push_back(
232 entry->ExchangeBuffer(std::move(new_buffer), new_size));
233
234 // This buffer could have been used
235 invalidated = true;
236 }
237 // Upload the new bits.
238 const std::size_t size_diff = new_size - old_size;
239 UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
240
241 // Update entry's size in the object and in the cache.
242 Unregister(entry);
243
244 entry->SetSize(new_size);
245 RasterizerCache<Buffer>::Register(entry);
246 }
247
248 Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
249 if (auto entry = TryGetReservedBuffer(host_ptr)) {
250 return entry;
251 }
252 return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
253 }
254
255 Buffer TryGetReservedBuffer(u8* host_ptr) {
256 const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
257 if (it == buffer_reserve.end()) {
258 return {};
259 }
260 auto& reserve = it->second;
261 auto entry = reserve.back();
262 reserve.pop_back();
263 return entry;
264 }
265
266 void ReserveBuffer(Buffer entry) {
267 buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
268 }
269
270 void AlignBuffer(std::size_t alignment) {
271 // Align the offset, not the mapped pointer
272 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
273 buffer_ptr += offset_aligned - buffer_offset;
274 buffer_offset = offset_aligned;
275 }
276
277 std::vector<BufferStorageType>& MarkedForDestruction() {
278 return marked_for_destruction_ring_buffer[marked_for_destruction_index];
279 }
280
281 Core::System& system;
282
283 std::unique_ptr<StreamBuffer> stream_buffer;
284 BufferType stream_buffer_handle{};
285
286 bool invalidated = false;
287
288 u8* buffer_ptr = nullptr;
289 u64 buffer_offset = 0;
290 u64 buffer_offset_base = 0;
291
292 std::size_t marked_for_destruction_index = 0;
293 std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
294
295 std::unordered_set<CacheAddr> internalized_entries;
296 std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
297};
298
299} // namespace VideoCommon
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 13e314944..8d15c8a48 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -67,6 +67,7 @@ public:
67 static constexpr std::size_t MaxShaderStage = 5; 67 static constexpr std::size_t MaxShaderStage = 5;
68 // Maximum number of const buffers per shader stage. 68 // Maximum number of const buffers per shader stage.
69 static constexpr std::size_t MaxConstBuffers = 18; 69 static constexpr std::size_t MaxConstBuffers = 18;
70 static constexpr std::size_t MaxConstBufferSize = 0x10000;
70 71
71 enum class QueryMode : u32 { 72 enum class QueryMode : u32 {
72 Write = 0, 73 Write = 0,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5ee4f8e8e..2b7367568 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,6 +47,9 @@ public:
47 /// and invalidated 47 /// and invalidated
48 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 48 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
49 49
50 /// Notify rasterizer that a frame is about to finish
51 virtual void TickFrame() = 0;
52
50 /// Attempt to use a faster method to perform a surface copy 53 /// Attempt to use a faster method to perform a surface copy
51 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 54 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
52 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 55 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2b9bd142e..2a9b523f5 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,103 +2,57 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
7 6
8#include "common/alignment.h" 7#include <glad/glad.h>
9#include "core/core.h" 8
10#include "video_core/memory_manager.h" 9#include "common/assert.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 10#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, 16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
17 std::size_t alignment, u8* host_ptr) 17 std::size_t stream_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, 18 : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{
19 alignment{alignment} {} 19 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
20
21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
23
24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
25 bool cache) {
26 std::lock_guard lock{mutex};
27 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
28
29 // Cache management is a big overhead, so only cache entries with a given size.
30 // TODO: Figure out which size is the best for given games.
31 cache &= size >= 2048;
32
33 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
34 if (cache) {
35 auto entry = TryGet(host_ptr);
36 if (entry) {
37 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
38 return entry->GetOffset();
39 }
40 Unregister(entry);
41 }
42 }
43 20
44 AlignBuffer(alignment); 21OGLBufferCache::~OGLBufferCache() = default;
45 const GLintptr uploaded_offset = buffer_offset;
46 22
47 if (!host_ptr) { 23OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) {
48 return uploaded_offset; 24 OGLBuffer buffer;
49 } 25 buffer.Create();
50 26 glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
51 std::memcpy(buffer_ptr, host_ptr, size); 27 return buffer;
52 buffer_ptr += size;
53 buffer_offset += size;
54
55 if (cache) {
56 auto entry = std::make_shared<CachedBufferEntry>(
57 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
58 Register(entry);
59 }
60
61 return uploaded_offset;
62} 28}
63 29
64GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, 30const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) {
65 std::size_t alignment) { 31 return &buffer.handle;
66 std::lock_guard lock{mutex};
67 AlignBuffer(alignment);
68 std::memcpy(buffer_ptr, raw_pointer, size);
69 const GLintptr uploaded_offset = buffer_offset;
70
71 buffer_ptr += size;
72 buffer_offset += size;
73 return uploaded_offset;
74} 32}
75 33
76bool OGLBufferCache::Map(std::size_t max_size) { 34const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
77 bool invalidate; 35 static const GLuint null_buffer = 0;
78 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 36 return &null_buffer;
79 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
80 buffer_offset = buffer_offset_base;
81
82 if (invalidate) {
83 InvalidateAll();
84 }
85 return invalidate;
86} 37}
87 38
88void OGLBufferCache::Unmap() { 39void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
89 stream_buffer.Unmap(buffer_offset - buffer_offset_base); 40 const u8* data) {
41 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
42 static_cast<GLsizeiptr>(size), data);
90} 43}
91 44
92GLuint OGLBufferCache::GetHandle() const { 45void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset,
93 return stream_buffer.GetHandle(); 46 std::size_t size, u8* data) {
47 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
48 static_cast<GLsizeiptr>(size), data);
94} 49}
95 50
96void OGLBufferCache::AlignBuffer(std::size_t alignment) { 51void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst,
97 // Align the offset, not the mapped pointer 52 std::size_t src_offset, std::size_t dst_offset,
98 const GLintptr offset_aligned = 53 std::size_t size) {
99 static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); 54 glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset),
100 buffer_ptr += offset_aligned - buffer_offset; 55 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
101 buffer_offset = offset_aligned;
102} 56}
103 57
104} // namespace OpenGL 58} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f2347581b..8c8ac4038 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,80 +4,44 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8#include <memory> 7#include <memory>
9#include <tuple>
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 14
15namespace Core {
16class System;
17}
18
16namespace OpenGL { 19namespace OpenGL {
17 20
21class OGLStreamBuffer;
18class RasterizerOpenGL; 22class RasterizerOpenGL;
19 23
20class CachedBufferEntry final : public RasterizerCacheObject { 24class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> {
21public:
22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
27 }
28
29 std::size_t GetSizeInBytes() const override {
30 return size;
31 }
32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
45private:
46 VAddr cpu_addr{};
47 std::size_t size{};
48 GLintptr offset{};
49 std::size_t alignment{};
50};
51
52class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
53public: 25public:
54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); 26 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
55 27 std::size_t stream_size);
56 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 28 ~OGLBufferCache();
57 /// allocated.
58 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
59 bool cache = true);
60 29
61 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 30 const GLuint* GetEmptyBuffer(std::size_t) override;
62 GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
63
64 bool Map(std::size_t max_size);
65 void Unmap();
66
67 GLuint GetHandle() const;
68 31
69protected: 32protected:
70 void AlignBuffer(std::size_t alignment); 33 OGLBuffer CreateBuffer(std::size_t size) override;
34
35 const GLuint* ToHandle(const OGLBuffer& buffer) override;
71 36
72 // We do not have to flush this cache as things in it are never modified by us. 37 void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
73 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} 38 const u8* data) override;
74 39
75private: 40 void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
76 OGLStreamBuffer stream_buffer; 41 u8* data) override;
77 42
78 u8* buffer_ptr = nullptr; 43 void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset,
79 GLintptr buffer_offset = 0; 44 std::size_t dst_offset, std::size_t size) override;
80 GLintptr buffer_offset_base = 0;
81}; 45};
82 46
83} // namespace OpenGL 47} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a48e14d2e..6238ddaaa 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -24,6 +24,7 @@ T GetInteger(GLenum pname) {
24 24
25Device::Device() { 25Device::Device() {
26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
27 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
29 has_variable_aoffi = TestVariableAoffi(); 30 has_variable_aoffi = TestVariableAoffi();
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c93760..939edb440 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -18,6 +18,10 @@ public:
18 return uniform_buffer_alignment; 18 return uniform_buffer_alignment;
19 } 19 }
20 20
21 std::size_t GetShaderStorageBufferAlignment() const {
22 return shader_storage_alignment;
23 }
24
21 u32 GetMaxVertexAttributes() const { 25 u32 GetMaxVertexAttributes() const {
22 return max_vertex_attributes; 26 return max_vertex_attributes;
23 } 27 }
@@ -39,6 +43,7 @@ private:
39 static bool TestComponentIndexingBug(); 43 static bool TestComponentIndexingBug();
40 44
41 std::size_t uniform_buffer_alignment{}; 45 std::size_t uniform_buffer_alignment{};
46 std::size_t shader_storage_alignment{};
42 u32 max_vertex_attributes{}; 47 u32 max_vertex_attributes{};
43 u32 max_varyings{}; 48 u32 max_varyings{};
44 bool has_variable_aoffi{}; 49 bool has_variable_aoffi{};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
deleted file mode 100644
index d5e385151..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/logging/log.h"
8#include "core/core.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_opengl/gl_global_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
19 max_size{max_size} {
20 buffer.Create();
21 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
22}
23
24CachedGlobalRegion::~CachedGlobalRegion() = default;
25
26void CachedGlobalRegion::Reload(u32 size_) {
27 size = size_;
28 if (size > max_size) {
29 size = max_size;
30 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
31 max_size);
32 }
33 glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
34}
35
36void CachedGlobalRegion::Flush() {
37 LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
38 glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
50 u32 size) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) {
53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
56 ASSERT(cpu_addr);
57
58 region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
59 ReserveGlobalRegion(region);
60 }
61 region->Reload(size);
62 return region;
63}
64
65void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
66 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
67}
68
69GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
70 : RasterizerCache{rasterizer} {
71 GLint max_ssbo_size_;
72 glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
73 max_ssbo_size = static_cast<u32>(max_ssbo_size_);
74}
75
76GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
77 const GLShader::GlobalMemoryEntry& global_region,
78 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
79 std::lock_guard lock{mutex};
80
81 auto& gpu{Core::System::GetInstance().GPU()};
82 auto& memory_manager{gpu.MemoryManager()};
83 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
84 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
85 global_region.GetCbufOffset()};
86 const auto actual_addr{memory_manager.Read<u64>(addr)};
87 const auto size{memory_manager.Read<u32>(addr + 8)};
88
89 // Look up global region in the cache based on address
90 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
91 GlobalRegion region{TryGet(host_ptr)};
92
93 if (!region) {
94 // No global region found - create a new one
95 region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
96 Register(region);
97 }
98
99 return region;
100}
101
102} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
deleted file mode 100644
index 2d467a240..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ /dev/null
@@ -1,82 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <glad/glad.h>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17
18namespace OpenGL {
19
20namespace GLShader {
21class GlobalMemoryEntry;
22}
23
24class RasterizerOpenGL;
25class CachedGlobalRegion;
26using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27
28class CachedGlobalRegion final : public RasterizerCacheObject {
29public:
30 explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
31 ~CachedGlobalRegion();
32
33 VAddr GetCpuAddr() const override {
34 return cpu_addr;
35 }
36
37 std::size_t GetSizeInBytes() const override {
38 return size;
39 }
40
41 /// Gets the GL program handle for the buffer
42 GLuint GetBufferHandle() const {
43 return buffer.handle;
44 }
45
46 /// Reloads the global region from guest memory
47 void Reload(u32 size_);
48
49 void Flush();
50
51private:
52 VAddr cpu_addr{};
53 u8* host_ptr{};
54 u32 size{};
55 u32 max_size{};
56
57 OGLBuffer buffer;
58};
59
60class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
61public:
62 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
63
64 /// Gets the current specified shader stage program
65 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
66 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
67
68protected:
69 void FlushObjectInner(const GlobalRegion& object) override {
70 object->Flush();
71 }
72
73private:
74 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
75 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
76 void ReserveGlobalRegion(GlobalRegion region);
77
78 std::unordered_map<CacheAddr, GlobalRegion> reserve;
79 u32 max_ssbo_size{};
80};
81
82} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f45a3c5ef..0bb5c068c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/process.h" 20#include "core/hle/kernel/process.h"
21#include "core/settings.h" 21#include "core/settings.h"
22#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 26#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -80,11 +81,25 @@ struct DrawParameters {
80 } 81 }
81}; 82};
82 83
84static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
85 const GLShader::ConstBufferEntry& entry) {
86 if (!entry.IsIndirect()) {
87 return entry.GetSize();
88 }
89
90 if (buffer.size > Maxwell::MaxConstBufferSize) {
91 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
92 Maxwell::MaxConstBufferSize);
93 return Maxwell::MaxConstBufferSize;
94 }
95
96 return buffer.size;
97}
98
83RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 99RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
84 ScreenInfo& info) 100 ScreenInfo& info)
85 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, 101 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
86 global_cache{*this}, system{system}, screen_info{info}, 102 system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
87 buffer_cache(*this, STREAM_BUFFER_SIZE) {
88 OpenGLState::ApplyDefaultState(); 103 OpenGLState::ApplyDefaultState();
89 104
90 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 105 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -129,8 +144,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
129 state.draw.vertex_array = vao; 144 state.draw.vertex_array = vao;
130 state.ApplyVertexArrayState(); 145 state.ApplyVertexArrayState();
131 146
132 glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
133
134 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. 147 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
135 // Enables the first 16 vertex attributes always, as we don't know which ones are actually 148 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
136 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 149 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
@@ -197,11 +210,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
197 210
198 ASSERT(end > start); 211 ASSERT(end > start);
199 const u64 size = end - start + 1; 212 const u64 size = end - start + 1;
200 const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); 213 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
201 214
202 // Bind the vertex array to the buffer at the current offset. 215 // Bind the vertex array to the buffer at the current offset.
203 glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, 216 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
204 vertex_array.stride); 217 vertex_array.stride);
205 218
206 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { 219 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
207 // Enable vertex buffer instancing with the specified divisor. 220 // Enable vertex buffer instancing with the specified divisor.
@@ -215,7 +228,19 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
215 gpu.dirty_flags.vertex_array.reset(); 228 gpu.dirty_flags.vertex_array.reset();
216} 229}
217 230
218DrawParameters RasterizerOpenGL::SetupDraw() { 231GLintptr RasterizerOpenGL::SetupIndexBuffer() {
232 if (accelerate_draw != AccelDraw::Indexed) {
233 return 0;
234 }
235 MICROPROFILE_SCOPE(OpenGL_Index);
236 const auto& regs = system.GPU().Maxwell3D().regs;
237 const std::size_t size = CalculateIndexBufferSize();
238 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
239 vertex_array_pushbuffer.SetIndexBuffer(buffer);
240 return offset;
241}
242
243DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
219 const auto& gpu = system.GPU().Maxwell3D(); 244 const auto& gpu = system.GPU().Maxwell3D();
220 const auto& regs = gpu.regs; 245 const auto& regs = gpu.regs;
221 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 246 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -227,11 +252,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
227 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); 252 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
228 253
229 if (is_indexed) { 254 if (is_indexed) {
230 MICROPROFILE_SCOPE(OpenGL_Index);
231 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); 255 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
232 params.count = regs.index_array.count; 256 params.count = regs.index_array.count;
233 params.index_buffer_offset = 257 params.index_buffer_offset = index_buffer_offset;
234 buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
235 params.base_vertex = static_cast<GLint>(regs.vb_element_base); 258 params.base_vertex = static_cast<GLint>(regs.vb_element_base);
236 } else { 259 } else {
237 params.count = regs.vertex_buffer.count; 260 params.count = regs.vertex_buffer.count;
@@ -247,10 +270,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
247 BaseBindings base_bindings; 270 BaseBindings base_bindings;
248 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 271 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
249 272
250 // Prepare packed bindings
251 bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
252 bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
253
254 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 273 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
255 const auto& shader_config = gpu.regs.shader_config[index]; 274 const auto& shader_config = gpu.regs.shader_config[index];
256 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 275 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -271,12 +290,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
271 290
272 GLShader::MaxwellUniformData ubo{}; 291 GLShader::MaxwellUniformData ubo{};
273 ubo.SetFromRegs(gpu, stage); 292 ubo.SetFromRegs(gpu, stage);
274 const GLintptr offset = 293 const auto [buffer, offset] =
275 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 294 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
276 295
277 // Bind the emulation info buffer 296 // Bind the emulation info buffer
278 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, 297 bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
279 static_cast<GLsizeiptr>(sizeof(ubo)));
280 298
281 Shader shader{shader_cache.GetStageProgram(program)}; 299 Shader shader{shader_cache.GetStageProgram(program)};
282 300
@@ -321,9 +339,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
321 base_bindings = next_bindings; 339 base_bindings = next_bindings;
322 } 340 }
323 341
324 bind_ubo_pushbuffer.Bind();
325 bind_ssbo_pushbuffer.Bind();
326
327 SyncClipEnabled(clip_distances); 342 SyncClipEnabled(clip_distances);
328 343
329 gpu.dirty_flags.shaders = false; 344 gpu.dirty_flags.shaders = false;
@@ -634,26 +649,46 @@ void RasterizerOpenGL::DrawArrays() {
634 Maxwell::MaxShaderStage; 649 Maxwell::MaxShaderStage;
635 650
636 // Add space for at least 18 constant buffers 651 // Add space for at least 18 constant buffers
637 buffer_size += 652 buffer_size += Maxwell::MaxConstBuffers *
638 Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); 653 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
639 654
640 const bool invalidate = buffer_cache.Map(buffer_size); 655 // Prepare the vertex array.
641 if (invalidate) { 656 buffer_cache.Map(buffer_size);
642 // As all cached buffers are invalidated, we need to recheck their state.
643 gpu.dirty_flags.vertex_array.set();
644 }
645 657
658 // Prepare vertex array format.
646 const GLuint vao = SetupVertexFormat(); 659 const GLuint vao = SetupVertexFormat();
660 vertex_array_pushbuffer.Setup(vao);
661
662 // Upload vertex and index data.
647 SetupVertexBuffer(vao); 663 SetupVertexBuffer(vao);
664 const GLintptr index_buffer_offset = SetupIndexBuffer();
648 665
649 DrawParameters params = SetupDraw(); 666 // Setup draw parameters. It will automatically choose what glDraw* method to use.
667 const DrawParameters params = SetupDraw(index_buffer_offset);
668
669 // Prepare packed bindings.
670 bind_ubo_pushbuffer.Setup(0);
671 bind_ssbo_pushbuffer.Setup(0);
672
673 // Setup shaders and their used resources.
650 texture_cache.GuardSamplers(true); 674 texture_cache.GuardSamplers(true);
651 SetupShaders(params.primitive_mode); 675 SetupShaders(params.primitive_mode);
652 texture_cache.GuardSamplers(false); 676 texture_cache.GuardSamplers(false);
653 677
654 ConfigureFramebuffers(state); 678 ConfigureFramebuffers(state);
655 679
656 buffer_cache.Unmap(); 680 // Signal the buffer cache that we are not going to upload more things.
681 const bool invalidate = buffer_cache.Unmap();
682
683 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
684 vertex_array_pushbuffer.Bind();
685 bind_ubo_pushbuffer.Bind();
686 bind_ssbo_pushbuffer.Bind();
687
688 if (invalidate) {
689 // As all cached buffers are invalidated, we need to recheck their state.
690 gpu.dirty_flags.vertex_array.set();
691 }
657 692
658 shader_program_manager->ApplyTo(state); 693 shader_program_manager->ApplyTo(state);
659 state.Apply(); 694 state.Apply();
@@ -675,7 +710,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
675 return; 710 return;
676 } 711 }
677 texture_cache.FlushRegion(addr, size); 712 texture_cache.FlushRegion(addr, size);
678 global_cache.FlushRegion(addr, size); 713 buffer_cache.FlushRegion(addr, size);
679} 714}
680 715
681void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 716void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -685,7 +720,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
685 } 720 }
686 texture_cache.InvalidateRegion(addr, size); 721 texture_cache.InvalidateRegion(addr, size);
687 shader_cache.InvalidateRegion(addr, size); 722 shader_cache.InvalidateRegion(addr, size);
688 global_cache.InvalidateRegion(addr, size);
689 buffer_cache.InvalidateRegion(addr, size); 723 buffer_cache.InvalidateRegion(addr, size);
690} 724}
691 725
@@ -696,6 +730,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
696 InvalidateRegion(addr, size); 730 InvalidateRegion(addr, size);
697} 731}
698 732
733void RasterizerOpenGL::TickFrame() {
734 buffer_cache.TickFrame();
735}
736
699bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 737bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
700 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 738 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
701 const Tegra::Engines::Fermi2D::Config& copy_config) { 739 const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -739,11 +777,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::Sh
739 MICROPROFILE_SCOPE(OpenGL_UBO); 777 MICROPROFILE_SCOPE(OpenGL_UBO);
740 const auto stage_index = static_cast<std::size_t>(stage); 778 const auto stage_index = static_cast<std::size_t>(stage);
741 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; 779 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
742 const auto& entries = shader->GetShaderEntries().const_buffers;
743 780
744 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 781 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
745 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 782 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
746 const auto& entry = entries[bindpoint];
747 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); 783 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
748 } 784 }
749} 785}
@@ -752,46 +788,34 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
752 const GLShader::ConstBufferEntry& entry) { 788 const GLShader::ConstBufferEntry& entry) {
753 if (!buffer.enabled) { 789 if (!buffer.enabled) {
754 // Set values to zero to unbind buffers 790 // Set values to zero to unbind buffers
755 bind_ubo_pushbuffer.Push(0, 0, 0); 791 bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
756 return; 792 return;
757 } 793 }
758 794
759 std::size_t size;
760 if (entry.IsIndirect()) {
761 // Buffer is accessed indirectly, so upload the entire thing
762 size = buffer.size;
763
764 if (size > MaxConstbufferSize) {
765 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
766 MaxConstbufferSize);
767 size = MaxConstbufferSize;
768 }
769 } else {
770 // Buffer is accessed directly, upload just what we use
771 size = entry.GetSize();
772 }
773
774 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 795 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
775 // UBO alignment requirements. 796 // UBO alignment requirements.
776 size = Common::AlignUp(size, sizeof(GLvec4)); 797 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
777 ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
778 798
779 const std::size_t alignment = device.GetUniformBufferAlignment(); 799 const auto alignment = device.GetUniformBufferAlignment();
780 const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); 800 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
781 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); 801 bind_ubo_pushbuffer.Push(cbuf, offset, size);
782} 802}
783 803
784void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 804void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
785 const Shader& shader) { 805 const Shader& shader) {
786 const auto& entries = shader->GetShaderEntries().global_memory_entries; 806 auto& gpu{system.GPU()};
787 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 807 auto& memory_manager{gpu.MemoryManager()};
788 const auto& entry{entries[bindpoint]}; 808 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
789 const auto& region{global_cache.GetGlobalRegion(entry, stage)}; 809 const auto alignment{device.GetShaderStorageBufferAlignment()};
790 if (entry.IsWritten()) { 810
791 region->MarkAsModified(true, global_cache); 811 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
792 } 812 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
793 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, 813 const auto actual_addr{memory_manager.Read<u64>(addr)};
794 static_cast<GLsizeiptr>(region->GetSizeInBytes())); 814 const auto size{memory_manager.Read<u32>(addr + 8)};
815
816 const auto [ssbo, buffer_offset] =
817 buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten());
818 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
795 } 819 }
796} 820}
797 821
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d238c1257..40b571d58 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,7 +24,6 @@
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_global_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h" 28#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -63,6 +62,7 @@ public:
63 void FlushRegion(CacheAddr addr, u64 size) override; 62 void FlushRegion(CacheAddr addr, u64 size) override;
64 void InvalidateRegion(CacheAddr addr, u64 size) override; 63 void InvalidateRegion(CacheAddr addr, u64 size) override;
65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 64 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
65 void TickFrame() override;
66 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 66 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
67 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 67 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
68 const Tegra::Engines::Fermi2D::Config& copy_config) override; 68 const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -73,11 +73,6 @@ public:
73 void LoadDiskResources(const std::atomic_bool& stop_loading, 73 void LoadDiskResources(const std::atomic_bool& stop_loading,
74 const VideoCore::DiskResourceLoadCallback& callback) override; 74 const VideoCore::DiskResourceLoadCallback& callback) override;
75 75
76 /// Maximum supported size that a constbuffer can have in bytes.
77 static constexpr std::size_t MaxConstbufferSize = 0x10000;
78 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
79 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
80
81private: 76private:
82 struct FramebufferConfigState { 77 struct FramebufferConfigState {
83 bool using_color_fb{}; 78 bool using_color_fb{};
@@ -191,7 +186,6 @@ private:
191 186
192 TextureCacheOpenGL texture_cache; 187 TextureCacheOpenGL texture_cache;
193 ShaderCacheOpenGL shader_cache; 188 ShaderCacheOpenGL shader_cache;
194 GlobalRegionCacheOpenGL global_cache;
195 SamplerCacheOpenGL sampler_cache; 189 SamplerCacheOpenGL sampler_cache;
196 FramebufferCacheOpenGL framebuffer_cache; 190 FramebufferCacheOpenGL framebuffer_cache;
197 191
@@ -210,6 +204,7 @@ private:
210 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 204 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
211 OGLBufferCache buffer_cache; 205 OGLBufferCache buffer_cache;
212 206
207 VertexArrayPushBuffer vertex_array_pushbuffer;
213 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 208 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
214 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 209 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
215 210
@@ -222,7 +217,9 @@ private:
222 217
223 void SetupVertexBuffer(GLuint vao); 218 void SetupVertexBuffer(GLuint vao);
224 219
225 DrawParameters SetupDraw(); 220 GLintptr SetupIndexBuffer();
221
222 DrawParameters SetupDraw(GLintptr index_buffer_offset);
226 223
227 void SetupShaders(GLenum primitive_mode); 224 void SetupShaders(GLenum primitive_mode);
228 225
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bfc975a04..47cc2011f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>;
46using TextureIR = std::variant<TextureAoffi, TextureArgument>; 46using TextureIR = std::variant<TextureAoffi, TextureArgument>;
47 47
48constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 48constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
49 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 49 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
50 50
51class ShaderWriter { 51class ShaderWriter {
52public: 52public:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b142521ec..9ecdddb0d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
101 101
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104/// Swap buffers (render frame)
105void RendererOpenGL::SwapBuffers( 104void RendererOpenGL::SwapBuffers(
106 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
107 106
@@ -130,6 +129,8 @@ void RendererOpenGL::SwapBuffers(
130 129
131 DrawScreen(render_window.GetFramebufferLayout()); 130 DrawScreen(render_window.GetFramebufferLayout());
132 131
132 rasterizer->TickFrame();
133
133 render_window.SwapBuffers(); 134 render_window.SwapBuffers();
134 } 135 }
135 136
@@ -262,7 +263,6 @@ void RendererOpenGL::CreateRasterizer() {
262 if (rasterizer) { 263 if (rasterizer) {
263 return; 264 return;
264 } 265 }
265 // Initialize sRGB Usage
266 OpenGLState::ClearsRGBUsed(); 266 OpenGLState::ClearsRGBUsed();
267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
268} 268}
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 68c36988d..c504a2c1a 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -13,29 +13,67 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
17
18VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
19
20void VertexArrayPushBuffer::Setup(GLuint vao_) {
21 vao = vao_;
22 index_buffer = nullptr;
23 vertex_buffers.clear();
24}
25
26void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
27 index_buffer = buffer;
28}
29
30void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
31 GLintptr offset, GLsizei stride) {
32 vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
33}
34
35void VertexArrayPushBuffer::Bind() {
36 if (index_buffer) {
37 glVertexArrayElementBuffer(vao, *index_buffer);
38 }
39
40 // TODO(Rodrigo): Find a way to ARB_multi_bind this
41 for (const auto& entry : vertex_buffers) {
42 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
43 entry.stride);
44 }
45}
46
16BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 47BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
17 48
18BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 49BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
19 50
20void BindBuffersRangePushBuffer::Setup(GLuint first_) { 51void BindBuffersRangePushBuffer::Setup(GLuint first_) {
21 first = first_; 52 first = first_;
22 buffers.clear(); 53 buffer_pointers.clear();
23 offsets.clear(); 54 offsets.clear();
24 sizes.clear(); 55 sizes.clear();
25} 56}
26 57
27void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { 58void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
28 buffers.push_back(buffer); 59 buffer_pointers.push_back(buffer);
29 offsets.push_back(offset); 60 offsets.push_back(offset);
30 sizes.push_back(size); 61 sizes.push_back(size);
31} 62}
32 63
33void BindBuffersRangePushBuffer::Bind() const { 64void BindBuffersRangePushBuffer::Bind() {
34 const std::size_t count{buffers.size()}; 65 // Ensure sizes are valid.
66 const std::size_t count{buffer_pointers.size()};
35 DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); 67 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
36 if (count == 0) { 68 if (count == 0) {
37 return; 69 return;
38 } 70 }
71
72 // Dereference buffers.
73 buffers.resize(count);
74 std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
75 [](const GLuint* pointer) { return *pointer; });
76
39 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), 77 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
40 sizes.data()); 78 sizes.data());
41} 79}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 4a752f3b4..6c2b45546 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,20 +11,49 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class BindBuffersRangePushBuffer { 14class VertexArrayPushBuffer final {
15public: 15public:
16 BindBuffersRangePushBuffer(GLenum target); 16 explicit VertexArrayPushBuffer();
17 ~VertexArrayPushBuffer();
18
19 void Setup(GLuint vao_);
20
21 void SetIndexBuffer(const GLuint* buffer);
22
23 void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
24 GLsizei stride);
25
26 void Bind();
27
28private:
29 struct Entry {
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35
36 GLuint vao{};
37 const GLuint* index_buffer{};
38 std::vector<Entry> vertex_buffers;
39};
40
41class BindBuffersRangePushBuffer final {
42public:
43 explicit BindBuffersRangePushBuffer(GLenum target);
17 ~BindBuffersRangePushBuffer(); 44 ~BindBuffersRangePushBuffer();
18 45
19 void Setup(GLuint first_); 46 void Setup(GLuint first_);
20 47
21 void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); 48 void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
22 49
23 void Bind() const; 50 void Bind();
24 51
25private: 52private:
26 GLenum target; 53 GLenum target{};
27 GLuint first; 54 GLuint first{};
55 std::vector<const GLuint*> buffer_pointers;
56
28 std::vector<GLuint> buffers; 57 std::vector<GLuint> buffers;
29 std::vector<GLintptr> offsets; 58 std::vector<GLintptr> offsets;
30 std::vector<GLsizeiptr> sizes; 59 std::vector<GLsizeiptr> sizes;