diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/video_core/buffer_cache.h | 299 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 1 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 102 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 82 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 154 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.h | 41 |
16 files changed, 537 insertions, 407 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cd32c65d3..7c18c27b3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache.h | ||
| 2 | dma_pusher.cpp | 3 | dma_pusher.cpp |
| 3 | dma_pusher.h | 4 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 5 | debug_utils/debug_utils.cpp |
| @@ -43,8 +44,6 @@ add_library(video_core STATIC | |||
| 43 | renderer_opengl/gl_device.h | 44 | renderer_opengl/gl_device.h |
| 44 | renderer_opengl/gl_framebuffer_cache.cpp | 45 | renderer_opengl/gl_framebuffer_cache.cpp |
| 45 | renderer_opengl/gl_framebuffer_cache.h | 46 | renderer_opengl/gl_framebuffer_cache.h |
| 46 | renderer_opengl/gl_global_cache.cpp | ||
| 47 | renderer_opengl/gl_global_cache.h | ||
| 48 | renderer_opengl/gl_rasterizer.cpp | 47 | renderer_opengl/gl_rasterizer.cpp |
| 49 | renderer_opengl/gl_rasterizer.h | 48 | renderer_opengl/gl_rasterizer.h |
| 50 | renderer_opengl/gl_resource_manager.cpp | 49 | renderer_opengl/gl_resource_manager.cpp |
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h new file mode 100644 index 000000000..6f868b8b4 --- /dev/null +++ b/src/video_core/buffer_cache.h | |||
| @@ -0,0 +1,299 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_cache.h" | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RasterizerInterface; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace VideoCommon { | ||
| 26 | |||
| 27 | template <typename BufferStorageType> | ||
| 28 | class CachedBuffer final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) | ||
| 31 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} | ||
| 32 | ~CachedBuffer() override = default; | ||
| 33 | |||
| 34 | VAddr GetCpuAddr() const override { | ||
| 35 | return cpu_addr; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::size_t GetSizeInBytes() const override { | ||
| 39 | return size; | ||
| 40 | } | ||
| 41 | |||
| 42 | u8* GetWritableHostPtr() const { | ||
| 43 | return host_ptr; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::size_t GetSize() const { | ||
| 47 | return size; | ||
| 48 | } | ||
| 49 | |||
| 50 | std::size_t GetCapacity() const { | ||
| 51 | return capacity; | ||
| 52 | } | ||
| 53 | |||
| 54 | bool IsInternalized() const { | ||
| 55 | return is_internal; | ||
| 56 | } | ||
| 57 | |||
| 58 | const BufferStorageType& GetBuffer() const { | ||
| 59 | return buffer; | ||
| 60 | } | ||
| 61 | |||
| 62 | void SetSize(std::size_t new_size) { | ||
| 63 | size = new_size; | ||
| 64 | } | ||
| 65 | |||
| 66 | void SetInternalState(bool is_internal_) { | ||
| 67 | is_internal = is_internal_; | ||
| 68 | } | ||
| 69 | |||
| 70 | BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { | ||
| 71 | capacity = new_capacity; | ||
| 72 | std::swap(buffer, buffer_); | ||
| 73 | return buffer_; | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | u8* host_ptr{}; | ||
| 78 | VAddr cpu_addr{}; | ||
| 79 | std::size_t size{}; | ||
| 80 | std::size_t capacity{}; | ||
| 81 | bool is_internal{}; | ||
| 82 | BufferStorageType buffer; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename BufferStorageType, typename BufferType, typename StreamBuffer> | ||
| 86 | class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { | ||
| 87 | public: | ||
| 88 | using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; | ||
| 89 | using BufferInfo = std::pair<const BufferType*, u64>; | ||
| 90 | |||
| 91 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 92 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 93 | : RasterizerCache<Buffer>{rasterizer}, system{system}, | ||
| 94 | stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ | ||
| 95 | this->stream_buffer->GetHandle()} {} | ||
| 96 | ~BufferCache() = default; | ||
| 97 | |||
| 98 | void Unregister(const Buffer& entry) override { | ||
| 99 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 100 | if (entry->IsInternalized()) { | ||
| 101 | internalized_entries.erase(entry->GetCacheAddr()); | ||
| 102 | } | ||
| 103 | ReserveBuffer(entry); | ||
| 104 | RasterizerCache<Buffer>::Unregister(entry); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TickFrame() { | ||
| 108 | marked_for_destruction_index = | ||
| 109 | (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); | ||
| 110 | MarkedForDestruction().clear(); | ||
| 111 | } | ||
| 112 | |||
| 113 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 114 | bool internalize = false, bool is_written = false) { | ||
| 115 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 116 | |||
| 117 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 118 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 119 | if (!host_ptr) { | ||
| 120 | return {GetEmptyBuffer(size), 0}; | ||
| 121 | } | ||
| 122 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 123 | |||
| 124 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 125 | // TODO: Figure out which size is the best for given games. | ||
| 126 | constexpr std::size_t max_stream_size = 0x800; | ||
| 127 | if (!internalize && size < max_stream_size && | ||
| 128 | internalized_entries.find(cache_addr) == internalized_entries.end()) { | ||
| 129 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 130 | } | ||
| 131 | |||
| 132 | auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); | ||
| 133 | if (!entry) { | ||
| 134 | return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); | ||
| 135 | } | ||
| 136 | |||
| 137 | if (entry->GetSize() < size) { | ||
| 138 | IncreaseBufferSize(entry, size); | ||
| 139 | } | ||
| 140 | if (is_written) { | ||
| 141 | entry->MarkAsModified(true, *this); | ||
| 142 | } | ||
| 143 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 147 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 148 | std::size_t alignment = 4) { | ||
| 149 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 150 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 151 | } | ||
| 152 | |||
| 153 | void Map(std::size_t max_size) { | ||
| 154 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 155 | buffer_offset = buffer_offset_base; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 159 | bool Unmap() { | ||
| 160 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 161 | return std::exchange(invalidated, false); | ||
| 162 | } | ||
| 163 | |||
| 164 | virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 165 | |||
| 166 | protected: | ||
| 167 | void FlushObjectInner(const Buffer& entry) override { | ||
| 168 | DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); | ||
| 169 | } | ||
| 170 | |||
| 171 | virtual BufferStorageType CreateBuffer(std::size_t size) = 0; | ||
| 172 | |||
| 173 | virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; | ||
| 174 | |||
| 175 | virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 176 | std::size_t size, const u8* data) = 0; | ||
| 177 | |||
| 178 | virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 179 | std::size_t size, u8* data) = 0; | ||
| 180 | |||
| 181 | virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, | ||
| 182 | std::size_t src_offset, std::size_t dst_offset, | ||
| 183 | std::size_t size) = 0; | ||
| 184 | |||
| 185 | private: | ||
| 186 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 187 | std::size_t alignment) { | ||
| 188 | AlignBuffer(alignment); | ||
| 189 | const std::size_t uploaded_offset = buffer_offset; | ||
| 190 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 191 | |||
| 192 | buffer_ptr += size; | ||
| 193 | buffer_offset += size; | ||
| 194 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 195 | } | ||
| 196 | |||
| 197 | BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, | ||
| 198 | bool internalize, bool is_written) { | ||
| 199 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 200 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 201 | ASSERT(cpu_addr); | ||
| 202 | |||
| 203 | auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); | ||
| 204 | entry->SetSize(size); | ||
| 205 | entry->SetInternalState(internalize); | ||
| 206 | RasterizerCache<Buffer>::Register(entry); | ||
| 207 | |||
| 208 | if (internalize) { | ||
| 209 | internalized_entries.emplace(ToCacheAddr(host_ptr)); | ||
| 210 | } | ||
| 211 | if (is_written) { | ||
| 212 | entry->MarkAsModified(true, *this); | ||
| 213 | } | ||
| 214 | |||
| 215 | if (entry->GetCapacity() < size) { | ||
| 216 | MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); | ||
| 217 | } | ||
| 218 | |||
| 219 | UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); | ||
| 220 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 221 | } | ||
| 222 | |||
| 223 | void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { | ||
| 224 | const std::size_t old_size = entry->GetSize(); | ||
| 225 | if (entry->GetCapacity() < new_size) { | ||
| 226 | const auto& old_buffer = entry->GetBuffer(); | ||
| 227 | auto new_buffer = CreateBuffer(new_size); | ||
| 228 | |||
| 229 | // Copy bits from the old buffer to the new buffer. | ||
| 230 | CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); | ||
| 231 | MarkedForDestruction().push_back( | ||
| 232 | entry->ExchangeBuffer(std::move(new_buffer), new_size)); | ||
| 233 | |||
| 234 | // This buffer could have been used | ||
| 235 | invalidated = true; | ||
| 236 | } | ||
| 237 | // Upload the new bits. | ||
| 238 | const std::size_t size_diff = new_size - old_size; | ||
| 239 | UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); | ||
| 240 | |||
| 241 | // Update entry's size in the object and in the cache. | ||
| 242 | Unregister(entry); | ||
| 243 | |||
| 244 | entry->SetSize(new_size); | ||
| 245 | RasterizerCache<Buffer>::Register(entry); | ||
| 246 | } | ||
| 247 | |||
| 248 | Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { | ||
| 249 | if (auto entry = TryGetReservedBuffer(host_ptr)) { | ||
| 250 | return entry; | ||
| 251 | } | ||
| 252 | return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); | ||
| 253 | } | ||
| 254 | |||
| 255 | Buffer TryGetReservedBuffer(u8* host_ptr) { | ||
| 256 | const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); | ||
| 257 | if (it == buffer_reserve.end()) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | auto& reserve = it->second; | ||
| 261 | auto entry = reserve.back(); | ||
| 262 | reserve.pop_back(); | ||
| 263 | return entry; | ||
| 264 | } | ||
| 265 | |||
| 266 | void ReserveBuffer(Buffer entry) { | ||
| 267 | buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); | ||
| 268 | } | ||
| 269 | |||
| 270 | void AlignBuffer(std::size_t alignment) { | ||
| 271 | // Align the offset, not the mapped pointer | ||
| 272 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 273 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 274 | buffer_offset = offset_aligned; | ||
| 275 | } | ||
| 276 | |||
| 277 | std::vector<BufferStorageType>& MarkedForDestruction() { | ||
| 278 | return marked_for_destruction_ring_buffer[marked_for_destruction_index]; | ||
| 279 | } | ||
| 280 | |||
| 281 | Core::System& system; | ||
| 282 | |||
| 283 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 284 | BufferType stream_buffer_handle{}; | ||
| 285 | |||
| 286 | bool invalidated = false; | ||
| 287 | |||
| 288 | u8* buffer_ptr = nullptr; | ||
| 289 | u64 buffer_offset = 0; | ||
| 290 | u64 buffer_offset_base = 0; | ||
| 291 | |||
| 292 | std::size_t marked_for_destruction_index = 0; | ||
| 293 | std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; | ||
| 294 | |||
| 295 | std::unordered_set<CacheAddr> internalized_entries; | ||
| 296 | std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; | ||
| 297 | }; | ||
| 298 | |||
| 299 | } // namespace VideoCommon | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e314944..8d15c8a48 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | static constexpr std::size_t MaxShaderStage = 5; | 67 | static constexpr std::size_t MaxShaderStage = 5; |
| 68 | // Maximum number of const buffers per shader stage. | 68 | // Maximum number of const buffers per shader stage. |
| 69 | static constexpr std::size_t MaxConstBuffers = 18; | 69 | static constexpr std::size_t MaxConstBuffers = 18; |
| 70 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | ||
| 70 | 71 | ||
| 71 | enum class QueryMode : u32 { | 72 | enum class QueryMode : u32 { |
| 72 | Write = 0, | 73 | Write = 0, |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5ee4f8e8e..2b7367568 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -47,6 +47,9 @@ public: | |||
| 47 | /// and invalidated | 47 | /// and invalidated |
| 48 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 48 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 49 | 49 | ||
| 50 | /// Notify rasterizer that a frame is about to finish | ||
| 51 | virtual void TickFrame() = 0; | ||
| 52 | |||
| 50 | /// Attempt to use a faster method to perform a surface copy | 53 | /// Attempt to use a faster method to perform a surface copy |
| 51 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 54 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 52 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 55 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e..2a9b523f5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,103 +2,57 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | 5 | #include <memory> |
| 7 | 6 | ||
| 8 | #include "common/alignment.h" | 7 | #include <glad/glad.h> |
| 9 | #include "core/core.h" | 8 | |
| 10 | #include "video_core/memory_manager.h" | 9 | #include "common/assert.h" |
| 11 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 10 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | 16 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 17 | std::size_t alignment, u8* host_ptr) | 17 | std::size_t stream_size) |
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | 18 | : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ |
| 19 | alignment{alignment} {} | 19 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} |
| 20 | |||
| 21 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | ||
| 22 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||
| 23 | |||
| 24 | GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, | ||
| 25 | bool cache) { | ||
| 26 | std::lock_guard lock{mutex}; | ||
| 27 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 28 | |||
| 29 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 30 | // TODO: Figure out which size is the best for given games. | ||
| 31 | cache &= size >= 2048; | ||
| 32 | |||
| 33 | const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||
| 34 | if (cache) { | ||
| 35 | auto entry = TryGet(host_ptr); | ||
| 36 | if (entry) { | ||
| 37 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||
| 38 | return entry->GetOffset(); | ||
| 39 | } | ||
| 40 | Unregister(entry); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | 20 | ||
| 44 | AlignBuffer(alignment); | 21 | OGLBufferCache::~OGLBufferCache() = default; |
| 45 | const GLintptr uploaded_offset = buffer_offset; | ||
| 46 | 22 | ||
| 47 | if (!host_ptr) { | 23 | OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { |
| 48 | return uploaded_offset; | 24 | OGLBuffer buffer; |
| 49 | } | 25 | buffer.Create(); |
| 50 | 26 | glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | |
| 51 | std::memcpy(buffer_ptr, host_ptr, size); | 27 | return buffer; |
| 52 | buffer_ptr += size; | ||
| 53 | buffer_offset += size; | ||
| 54 | |||
| 55 | if (cache) { | ||
| 56 | auto entry = std::make_shared<CachedBufferEntry>( | ||
| 57 | *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); | ||
| 58 | Register(entry); | ||
| 59 | } | ||
| 60 | |||
| 61 | return uploaded_offset; | ||
| 62 | } | 28 | } |
| 63 | 29 | ||
| 64 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, | 30 | const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { |
| 65 | std::size_t alignment) { | 31 | return &buffer.handle; |
| 66 | std::lock_guard lock{mutex}; | ||
| 67 | AlignBuffer(alignment); | ||
| 68 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 69 | const GLintptr uploaded_offset = buffer_offset; | ||
| 70 | |||
| 71 | buffer_ptr += size; | ||
| 72 | buffer_offset += size; | ||
| 73 | return uploaded_offset; | ||
| 74 | } | 32 | } |
| 75 | 33 | ||
| 76 | bool OGLBufferCache::Map(std::size_t max_size) { | 34 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 77 | bool invalidate; | 35 | static const GLuint null_buffer = 0; |
| 78 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 36 | return &null_buffer; |
| 79 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | ||
| 80 | buffer_offset = buffer_offset_base; | ||
| 81 | |||
| 82 | if (invalidate) { | ||
| 83 | InvalidateAll(); | ||
| 84 | } | ||
| 85 | return invalidate; | ||
| 86 | } | 37 | } |
| 87 | 38 | ||
| 88 | void OGLBufferCache::Unmap() { | 39 | void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 89 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | 40 | const u8* data) { |
| 41 | glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 42 | static_cast<GLsizeiptr>(size), data); | ||
| 90 | } | 43 | } |
| 91 | 44 | ||
| 92 | GLuint OGLBufferCache::GetHandle() const { | 45 | void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, |
| 93 | return stream_buffer.GetHandle(); | 46 | std::size_t size, u8* data) { |
| 47 | glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 48 | static_cast<GLsizeiptr>(size), data); | ||
| 94 | } | 49 | } |
| 95 | 50 | ||
| 96 | void OGLBufferCache::AlignBuffer(std::size_t alignment) { | 51 | void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, |
| 97 | // Align the offset, not the mapped pointer | 52 | std::size_t src_offset, std::size_t dst_offset, |
| 98 | const GLintptr offset_aligned = | 53 | std::size_t size) { |
| 99 | static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | 54 | glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), |
| 100 | buffer_ptr += offset_aligned - buffer_offset; | 55 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); |
| 101 | buffer_offset = offset_aligned; | ||
| 102 | } | 56 | } |
| 103 | 57 | ||
| 104 | } // namespace OpenGL | 58 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b..8c8ac4038 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -4,80 +4,44 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <tuple> | ||
| 10 | 8 | ||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 15 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 16 | namespace OpenGL { | 19 | namespace OpenGL { |
| 17 | 20 | ||
| 21 | class OGLStreamBuffer; | ||
| 18 | class RasterizerOpenGL; | 22 | class RasterizerOpenGL; |
| 19 | 23 | ||
| 20 | class CachedBufferEntry final : public RasterizerCacheObject { | 24 | class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { |
| 21 | public: | ||
| 22 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||
| 23 | std::size_t alignment, u8* host_ptr); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const override { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | std::size_t GetSizeInBytes() const override { | ||
| 30 | return size; | ||
| 31 | } | ||
| 32 | |||
| 33 | std::size_t GetSize() const { | ||
| 34 | return size; | ||
| 35 | } | ||
| 36 | |||
| 37 | GLintptr GetOffset() const { | ||
| 38 | return offset; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::size_t GetAlignment() const { | ||
| 42 | return alignment; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | VAddr cpu_addr{}; | ||
| 47 | std::size_t size{}; | ||
| 48 | GLintptr offset{}; | ||
| 49 | std::size_t alignment{}; | ||
| 50 | }; | ||
| 51 | |||
| 52 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||
| 53 | public: | 25 | public: |
| 54 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); | 26 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 55 | 27 | std::size_t stream_size); | |
| 56 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 28 | ~OGLBufferCache(); |
| 57 | /// allocated. | ||
| 58 | GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 59 | bool cache = true); | ||
| 60 | 29 | ||
| 61 | /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | 30 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 62 | GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||
| 63 | |||
| 64 | bool Map(std::size_t max_size); | ||
| 65 | void Unmap(); | ||
| 66 | |||
| 67 | GLuint GetHandle() const; | ||
| 68 | 31 | ||
| 69 | protected: | 32 | protected: |
| 70 | void AlignBuffer(std::size_t alignment); | 33 | OGLBuffer CreateBuffer(std::size_t size) override; |
| 34 | |||
| 35 | const GLuint* ToHandle(const OGLBuffer& buffer) override; | ||
| 71 | 36 | ||
| 72 | // We do not have to flush this cache as things in it are never modified by us. | 37 | void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 73 | void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | 38 | const u8* data) override; |
| 74 | 39 | ||
| 75 | private: | 40 | void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 76 | OGLStreamBuffer stream_buffer; | 41 | u8* data) override; |
| 77 | 42 | ||
| 78 | u8* buffer_ptr = nullptr; | 43 | void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, |
| 79 | GLintptr buffer_offset = 0; | 44 | std::size_t dst_offset, std::size_t size) override; |
| 80 | GLintptr buffer_offset_base = 0; | ||
| 81 | }; | 45 | }; |
| 82 | 46 | ||
| 83 | } // namespace OpenGL | 47 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2e..6238ddaaa 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -24,6 +24,7 @@ T GetInteger(GLenum pname) { | |||
| 24 | 24 | ||
| 25 | Device::Device() { | 25 | Device::Device() { |
| 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 27 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 27 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 28 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 28 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 29 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 29 | has_variable_aoffi = TestVariableAoffi(); | 30 | has_variable_aoffi = TestVariableAoffi(); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..939edb440 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -18,6 +18,10 @@ public: | |||
| 18 | return uniform_buffer_alignment; | 18 | return uniform_buffer_alignment; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | std::size_t GetShaderStorageBufferAlignment() const { | ||
| 22 | return shader_storage_alignment; | ||
| 23 | } | ||
| 24 | |||
| 21 | u32 GetMaxVertexAttributes() const { | 25 | u32 GetMaxVertexAttributes() const { |
| 22 | return max_vertex_attributes; | 26 | return max_vertex_attributes; |
| 23 | } | 27 | } |
| @@ -39,6 +43,7 @@ private: | |||
| 39 | static bool TestComponentIndexingBug(); | 43 | static bool TestComponentIndexingBug(); |
| 40 | 44 | ||
| 41 | std::size_t uniform_buffer_alignment{}; | 45 | std::size_t uniform_buffer_alignment{}; |
| 46 | std::size_t shader_storage_alignment{}; | ||
| 42 | u32 max_vertex_attributes{}; | 47 | u32 max_vertex_attributes{}; |
| 43 | u32 max_varyings{}; | 48 | u32 max_varyings{}; |
| 44 | bool has_variable_aoffi{}; | 49 | bool has_variable_aoffi{}; |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null | |||
| @@ -1,102 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) | ||
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, | ||
| 19 | max_size{max_size} { | ||
| 20 | buffer.Create(); | ||
| 21 | LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); | ||
| 22 | } | ||
| 23 | |||
| 24 | CachedGlobalRegion::~CachedGlobalRegion() = default; | ||
| 25 | |||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | size = size_; | ||
| 28 | if (size > max_size) { | ||
| 29 | size = max_size; | ||
| 30 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, | ||
| 31 | max_size); | ||
| 32 | } | ||
| 33 | glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); | ||
| 34 | } | ||
| 35 | |||
| 36 | void CachedGlobalRegion::Flush() { | ||
| 37 | LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); | ||
| 38 | glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, | ||
| 50 | u32 size) { | ||
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||
| 52 | if (!region) { | ||
| 53 | // No reserved surface available, create a new one and reserve it | ||
| 54 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 55 | const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; | ||
| 56 | ASSERT(cpu_addr); | ||
| 57 | |||
| 58 | region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); | ||
| 59 | ReserveGlobalRegion(region); | ||
| 60 | } | ||
| 61 | region->Reload(size); | ||
| 62 | return region; | ||
| 63 | } | ||
| 64 | |||
| 65 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||
| 66 | reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); | ||
| 67 | } | ||
| 68 | |||
| 69 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 70 | : RasterizerCache{rasterizer} { | ||
| 71 | GLint max_ssbo_size_; | ||
| 72 | glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); | ||
| 73 | max_ssbo_size = static_cast<u32>(max_ssbo_size_); | ||
| 74 | } | ||
| 75 | |||
| 76 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 77 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 78 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 82 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 83 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | ||
| 84 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | ||
| 85 | global_region.GetCbufOffset()}; | ||
| 86 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | ||
| 87 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 88 | |||
| 89 | // Look up global region in the cache based on address | ||
| 90 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | ||
| 91 | GlobalRegion region{TryGet(host_ptr)}; | ||
| 92 | |||
| 93 | if (!region) { | ||
| 94 | // No global region found - create a new one | ||
| 95 | region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); | ||
| 96 | Register(region); | ||
| 97 | } | ||
| 98 | |||
| 99 | return region; | ||
| 100 | } | ||
| 101 | |||
| 102 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a240..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null | |||
| @@ -1,82 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <glad/glad.h> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | namespace GLShader { | ||
| 21 | class GlobalMemoryEntry; | ||
| 22 | } | ||
| 23 | |||
| 24 | class RasterizerOpenGL; | ||
| 25 | class CachedGlobalRegion; | ||
| 26 | using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | ||
| 27 | |||
| 28 | class CachedGlobalRegion final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); | ||
| 31 | ~CachedGlobalRegion(); | ||
| 32 | |||
| 33 | VAddr GetCpuAddr() const override { | ||
| 34 | return cpu_addr; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::size_t GetSizeInBytes() const override { | ||
| 38 | return size; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Gets the GL program handle for the buffer | ||
| 42 | GLuint GetBufferHandle() const { | ||
| 43 | return buffer.handle; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Reloads the global region from guest memory | ||
| 47 | void Reload(u32 size_); | ||
| 48 | |||
| 49 | void Flush(); | ||
| 50 | |||
| 51 | private: | ||
| 52 | VAddr cpu_addr{}; | ||
| 53 | u8* host_ptr{}; | ||
| 54 | u32 size{}; | ||
| 55 | u32 max_size{}; | ||
| 56 | |||
| 57 | OGLBuffer buffer; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | ||
| 61 | public: | ||
| 62 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 63 | |||
| 64 | /// Gets the current specified shader stage program | ||
| 65 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 66 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 67 | |||
| 68 | protected: | ||
| 69 | void FlushObjectInner(const GlobalRegion& object) override { | ||
| 70 | object->Flush(); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; | ||
| 75 | GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); | ||
| 76 | void ReserveGlobalRegion(GlobalRegion region); | ||
| 77 | |||
| 78 | std::unordered_map<CacheAddr, GlobalRegion> reserve; | ||
| 79 | u32 max_ssbo_size{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f45a3c5ef..0bb5c068c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "core/hle/kernel/process.h" | 20 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 21 | #include "core/settings.h" |
| 22 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/memory_manager.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 26 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -80,11 +81,25 @@ struct DrawParameters { | |||
| 80 | } | 81 | } |
| 81 | }; | 82 | }; |
| 82 | 83 | ||
| 84 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 85 | const GLShader::ConstBufferEntry& entry) { | ||
| 86 | if (!entry.IsIndirect()) { | ||
| 87 | return entry.GetSize(); | ||
| 88 | } | ||
| 89 | |||
| 90 | if (buffer.size > Maxwell::MaxConstBufferSize) { | ||
| 91 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||
| 92 | Maxwell::MaxConstBufferSize); | ||
| 93 | return Maxwell::MaxConstBufferSize; | ||
| 94 | } | ||
| 95 | |||
| 96 | return buffer.size; | ||
| 97 | } | ||
| 98 | |||
| 83 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 99 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 84 | ScreenInfo& info) | 100 | ScreenInfo& info) |
| 85 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | 101 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 86 | global_cache{*this}, system{system}, screen_info{info}, | 102 | system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { |
| 87 | buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||
| 88 | OpenGLState::ApplyDefaultState(); | 103 | OpenGLState::ApplyDefaultState(); |
| 89 | 104 | ||
| 90 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 105 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| @@ -129,8 +144,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 129 | state.draw.vertex_array = vao; | 144 | state.draw.vertex_array = vao; |
| 130 | state.ApplyVertexArrayState(); | 145 | state.ApplyVertexArrayState(); |
| 131 | 146 | ||
| 132 | glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); | ||
| 133 | |||
| 134 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 147 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 135 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually | 148 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually |
| 136 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 | 149 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 |
| @@ -197,11 +210,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 197 | 210 | ||
| 198 | ASSERT(end > start); | 211 | ASSERT(end > start); |
| 199 | const u64 size = end - start + 1; | 212 | const u64 size = end - start + 1; |
| 200 | const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); | 213 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); |
| 201 | 214 | ||
| 202 | // Bind the vertex array to the buffer at the current offset. | 215 | // Bind the vertex array to the buffer at the current offset. |
| 203 | glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, | 216 | vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, |
| 204 | vertex_array.stride); | 217 | vertex_array.stride); |
| 205 | 218 | ||
| 206 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { | 219 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { |
| 207 | // Enable vertex buffer instancing with the specified divisor. | 220 | // Enable vertex buffer instancing with the specified divisor. |
| @@ -215,7 +228,19 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 215 | gpu.dirty_flags.vertex_array.reset(); | 228 | gpu.dirty_flags.vertex_array.reset(); |
| 216 | } | 229 | } |
| 217 | 230 | ||
| 218 | DrawParameters RasterizerOpenGL::SetupDraw() { | 231 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| 232 | if (accelerate_draw != AccelDraw::Indexed) { | ||
| 233 | return 0; | ||
| 234 | } | ||
| 235 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 236 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 237 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 238 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 239 | vertex_array_pushbuffer.SetIndexBuffer(buffer); | ||
| 240 | return offset; | ||
| 241 | } | ||
| 242 | |||
| 243 | DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { | ||
| 219 | const auto& gpu = system.GPU().Maxwell3D(); | 244 | const auto& gpu = system.GPU().Maxwell3D(); |
| 220 | const auto& regs = gpu.regs; | 245 | const auto& regs = gpu.regs; |
| 221 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | 246 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; |
| @@ -227,11 +252,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() { | |||
| 227 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | 252 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); |
| 228 | 253 | ||
| 229 | if (is_indexed) { | 254 | if (is_indexed) { |
| 230 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 231 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | 255 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); |
| 232 | params.count = regs.index_array.count; | 256 | params.count = regs.index_array.count; |
| 233 | params.index_buffer_offset = | 257 | params.index_buffer_offset = index_buffer_offset; |
| 234 | buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||
| 235 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); | 258 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); |
| 236 | } else { | 259 | } else { |
| 237 | params.count = regs.vertex_buffer.count; | 260 | params.count = regs.vertex_buffer.count; |
| @@ -247,10 +270,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 247 | BaseBindings base_bindings; | 270 | BaseBindings base_bindings; |
| 248 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 271 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 249 | 272 | ||
| 250 | // Prepare packed bindings | ||
| 251 | bind_ubo_pushbuffer.Setup(base_bindings.cbuf); | ||
| 252 | bind_ssbo_pushbuffer.Setup(base_bindings.gmem); | ||
| 253 | |||
| 254 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 273 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 255 | const auto& shader_config = gpu.regs.shader_config[index]; | 274 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 256 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 275 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -271,12 +290,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 271 | 290 | ||
| 272 | GLShader::MaxwellUniformData ubo{}; | 291 | GLShader::MaxwellUniformData ubo{}; |
| 273 | ubo.SetFromRegs(gpu, stage); | 292 | ubo.SetFromRegs(gpu, stage); |
| 274 | const GLintptr offset = | 293 | const auto [buffer, offset] = |
| 275 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 294 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 276 | 295 | ||
| 277 | // Bind the emulation info buffer | 296 | // Bind the emulation info buffer |
| 278 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, | 297 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); |
| 279 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 280 | 298 | ||
| 281 | Shader shader{shader_cache.GetStageProgram(program)}; | 299 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 282 | 300 | ||
| @@ -321,9 +339,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 321 | base_bindings = next_bindings; | 339 | base_bindings = next_bindings; |
| 322 | } | 340 | } |
| 323 | 341 | ||
| 324 | bind_ubo_pushbuffer.Bind(); | ||
| 325 | bind_ssbo_pushbuffer.Bind(); | ||
| 326 | |||
| 327 | SyncClipEnabled(clip_distances); | 342 | SyncClipEnabled(clip_distances); |
| 328 | 343 | ||
| 329 | gpu.dirty_flags.shaders = false; | 344 | gpu.dirty_flags.shaders = false; |
| @@ -634,26 +649,46 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 634 | Maxwell::MaxShaderStage; | 649 | Maxwell::MaxShaderStage; |
| 635 | 650 | ||
| 636 | // Add space for at least 18 constant buffers | 651 | // Add space for at least 18 constant buffers |
| 637 | buffer_size += | 652 | buffer_size += Maxwell::MaxConstBuffers * |
| 638 | Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | 653 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 639 | 654 | ||
| 640 | const bool invalidate = buffer_cache.Map(buffer_size); | 655 | // Prepare the vertex array. |
| 641 | if (invalidate) { | 656 | buffer_cache.Map(buffer_size); |
| 642 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 643 | gpu.dirty_flags.vertex_array.set(); | ||
| 644 | } | ||
| 645 | 657 | ||
| 658 | // Prepare vertex array format. | ||
| 646 | const GLuint vao = SetupVertexFormat(); | 659 | const GLuint vao = SetupVertexFormat(); |
| 660 | vertex_array_pushbuffer.Setup(vao); | ||
| 661 | |||
| 662 | // Upload vertex and index data. | ||
| 647 | SetupVertexBuffer(vao); | 663 | SetupVertexBuffer(vao); |
| 664 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | ||
| 648 | 665 | ||
| 649 | DrawParameters params = SetupDraw(); | 666 | // Setup draw parameters. It will automatically choose what glDraw* method to use. |
| 667 | const DrawParameters params = SetupDraw(index_buffer_offset); | ||
| 668 | |||
| 669 | // Prepare packed bindings. | ||
| 670 | bind_ubo_pushbuffer.Setup(0); | ||
| 671 | bind_ssbo_pushbuffer.Setup(0); | ||
| 672 | |||
| 673 | // Setup shaders and their used resources. | ||
| 650 | texture_cache.GuardSamplers(true); | 674 | texture_cache.GuardSamplers(true); |
| 651 | SetupShaders(params.primitive_mode); | 675 | SetupShaders(params.primitive_mode); |
| 652 | texture_cache.GuardSamplers(false); | 676 | texture_cache.GuardSamplers(false); |
| 653 | 677 | ||
| 654 | ConfigureFramebuffers(state); | 678 | ConfigureFramebuffers(state); |
| 655 | 679 | ||
| 656 | buffer_cache.Unmap(); | 680 | // Signal the buffer cache that we are not going to upload more things. |
| 681 | const bool invalidate = buffer_cache.Unmap(); | ||
| 682 | |||
| 683 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | ||
| 684 | vertex_array_pushbuffer.Bind(); | ||
| 685 | bind_ubo_pushbuffer.Bind(); | ||
| 686 | bind_ssbo_pushbuffer.Bind(); | ||
| 687 | |||
| 688 | if (invalidate) { | ||
| 689 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 690 | gpu.dirty_flags.vertex_array.set(); | ||
| 691 | } | ||
| 657 | 692 | ||
| 658 | shader_program_manager->ApplyTo(state); | 693 | shader_program_manager->ApplyTo(state); |
| 659 | state.Apply(); | 694 | state.Apply(); |
| @@ -675,7 +710,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 675 | return; | 710 | return; |
| 676 | } | 711 | } |
| 677 | texture_cache.FlushRegion(addr, size); | 712 | texture_cache.FlushRegion(addr, size); |
| 678 | global_cache.FlushRegion(addr, size); | 713 | buffer_cache.FlushRegion(addr, size); |
| 679 | } | 714 | } |
| 680 | 715 | ||
| 681 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 716 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -685,7 +720,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 685 | } | 720 | } |
| 686 | texture_cache.InvalidateRegion(addr, size); | 721 | texture_cache.InvalidateRegion(addr, size); |
| 687 | shader_cache.InvalidateRegion(addr, size); | 722 | shader_cache.InvalidateRegion(addr, size); |
| 688 | global_cache.InvalidateRegion(addr, size); | ||
| 689 | buffer_cache.InvalidateRegion(addr, size); | 723 | buffer_cache.InvalidateRegion(addr, size); |
| 690 | } | 724 | } |
| 691 | 725 | ||
| @@ -696,6 +730,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 696 | InvalidateRegion(addr, size); | 730 | InvalidateRegion(addr, size); |
| 697 | } | 731 | } |
| 698 | 732 | ||
| 733 | void RasterizerOpenGL::TickFrame() { | ||
| 734 | buffer_cache.TickFrame(); | ||
| 735 | } | ||
| 736 | |||
| 699 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 737 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 700 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 738 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 701 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 739 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| @@ -739,11 +777,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::Sh | |||
| 739 | MICROPROFILE_SCOPE(OpenGL_UBO); | 777 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 740 | const auto stage_index = static_cast<std::size_t>(stage); | 778 | const auto stage_index = static_cast<std::size_t>(stage); |
| 741 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 779 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; |
| 742 | const auto& entries = shader->GetShaderEntries().const_buffers; | ||
| 743 | 780 | ||
| 744 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | 781 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage |
| 745 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 782 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 746 | const auto& entry = entries[bindpoint]; | ||
| 747 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 783 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); |
| 748 | } | 784 | } |
| 749 | } | 785 | } |
| @@ -752,46 +788,34 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 752 | const GLShader::ConstBufferEntry& entry) { | 788 | const GLShader::ConstBufferEntry& entry) { |
| 753 | if (!buffer.enabled) { | 789 | if (!buffer.enabled) { |
| 754 | // Set values to zero to unbind buffers | 790 | // Set values to zero to unbind buffers |
| 755 | bind_ubo_pushbuffer.Push(0, 0, 0); | 791 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); |
| 756 | return; | 792 | return; |
| 757 | } | 793 | } |
| 758 | 794 | ||
| 759 | std::size_t size; | ||
| 760 | if (entry.IsIndirect()) { | ||
| 761 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 762 | size = buffer.size; | ||
| 763 | |||
| 764 | if (size > MaxConstbufferSize) { | ||
| 765 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, | ||
| 766 | MaxConstbufferSize); | ||
| 767 | size = MaxConstbufferSize; | ||
| 768 | } | ||
| 769 | } else { | ||
| 770 | // Buffer is accessed directly, upload just what we use | ||
| 771 | size = entry.GetSize(); | ||
| 772 | } | ||
| 773 | |||
| 774 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | 795 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 |
| 775 | // UBO alignment requirements. | 796 | // UBO alignment requirements. |
| 776 | size = Common::AlignUp(size, sizeof(GLvec4)); | 797 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 777 | ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); | ||
| 778 | 798 | ||
| 779 | const std::size_t alignment = device.GetUniformBufferAlignment(); | 799 | const auto alignment = device.GetUniformBufferAlignment(); |
| 780 | const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); | 800 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 781 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); | 801 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 782 | } | 802 | } |
| 783 | 803 | ||
| 784 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 804 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 785 | const Shader& shader) { | 805 | const Shader& shader) { |
| 786 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | 806 | auto& gpu{system.GPU()}; |
| 787 | for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 807 | auto& memory_manager{gpu.MemoryManager()}; |
| 788 | const auto& entry{entries[bindpoint]}; | 808 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 789 | const auto& region{global_cache.GetGlobalRegion(entry, stage)}; | 809 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 790 | if (entry.IsWritten()) { | 810 | |
| 791 | region->MarkAsModified(true, global_cache); | 811 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 792 | } | 812 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 793 | bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, | 813 | const auto actual_addr{memory_manager.Read<u64>(addr)}; |
| 794 | static_cast<GLsizeiptr>(region->GetSizeInBytes())); | 814 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 815 | |||
| 816 | const auto [ssbo, buffer_offset] = | ||
| 817 | buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); | ||
| 818 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 795 | } | 819 | } |
| 796 | } | 820 | } |
| 797 | 821 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d238c1257..40b571d58 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 27 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 28 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -63,6 +62,7 @@ public: | |||
| 63 | void FlushRegion(CacheAddr addr, u64 size) override; | 62 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 63 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void TickFrame() override; | ||
| 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| @@ -73,11 +73,6 @@ public: | |||
| 73 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 73 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 74 | const VideoCore::DiskResourceLoadCallback& callback) override; | 74 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 75 | 75 | ||
| 76 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 77 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 78 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||
| 79 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 80 | |||
| 81 | private: | 76 | private: |
| 82 | struct FramebufferConfigState { | 77 | struct FramebufferConfigState { |
| 83 | bool using_color_fb{}; | 78 | bool using_color_fb{}; |
| @@ -191,7 +186,6 @@ private: | |||
| 191 | 186 | ||
| 192 | TextureCacheOpenGL texture_cache; | 187 | TextureCacheOpenGL texture_cache; |
| 193 | ShaderCacheOpenGL shader_cache; | 188 | ShaderCacheOpenGL shader_cache; |
| 194 | GlobalRegionCacheOpenGL global_cache; | ||
| 195 | SamplerCacheOpenGL sampler_cache; | 189 | SamplerCacheOpenGL sampler_cache; |
| 196 | FramebufferCacheOpenGL framebuffer_cache; | 190 | FramebufferCacheOpenGL framebuffer_cache; |
| 197 | 191 | ||
| @@ -210,6 +204,7 @@ private: | |||
| 210 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 204 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 211 | OGLBufferCache buffer_cache; | 205 | OGLBufferCache buffer_cache; |
| 212 | 206 | ||
| 207 | VertexArrayPushBuffer vertex_array_pushbuffer; | ||
| 213 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 208 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 214 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 209 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 215 | 210 | ||
| @@ -222,7 +217,9 @@ private: | |||
| 222 | 217 | ||
| 223 | void SetupVertexBuffer(GLuint vao); | 218 | void SetupVertexBuffer(GLuint vao); |
| 224 | 219 | ||
| 225 | DrawParameters SetupDraw(); | 220 | GLintptr SetupIndexBuffer(); |
| 221 | |||
| 222 | DrawParameters SetupDraw(GLintptr index_buffer_offset); | ||
| 226 | 223 | ||
| 227 | void SetupShaders(GLenum primitive_mode); | 224 | void SetupShaders(GLenum primitive_mode); |
| 228 | 225 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bfc975a04..47cc2011f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>; | |||
| 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; |
| 47 | 47 | ||
| 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 49 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 49 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 50 | 50 | ||
| 51 | class ShaderWriter { | 51 | class ShaderWriter { |
| 52 | public: | 52 | public: |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b142521ec..9ecdddb0d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 101 | 101 | ||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | /// Swap buffers (render frame) | ||
| 105 | void RendererOpenGL::SwapBuffers( | 104 | void RendererOpenGL::SwapBuffers( |
| 106 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
| 107 | 106 | ||
| @@ -130,6 +129,8 @@ void RendererOpenGL::SwapBuffers( | |||
| 130 | 129 | ||
| 131 | DrawScreen(render_window.GetFramebufferLayout()); | 130 | DrawScreen(render_window.GetFramebufferLayout()); |
| 132 | 131 | ||
| 132 | rasterizer->TickFrame(); | ||
| 133 | |||
| 133 | render_window.SwapBuffers(); | 134 | render_window.SwapBuffers(); |
| 134 | } | 135 | } |
| 135 | 136 | ||
| @@ -262,7 +263,6 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 262 | if (rasterizer) { | 263 | if (rasterizer) { |
| 263 | return; | 264 | return; |
| 264 | } | 265 | } |
| 265 | // Initialize sRGB Usage | ||
| 266 | OpenGLState::ClearsRGBUsed(); | 266 | OpenGLState::ClearsRGBUsed(); |
| 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); |
| 268 | } | 268 | } |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 68c36988d..c504a2c1a 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -13,29 +13,67 @@ | |||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | ||
| 17 | |||
| 18 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | ||
| 19 | |||
| 20 | void VertexArrayPushBuffer::Setup(GLuint vao_) { | ||
| 21 | vao = vao_; | ||
| 22 | index_buffer = nullptr; | ||
| 23 | vertex_buffers.clear(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { | ||
| 27 | index_buffer = buffer; | ||
| 28 | } | ||
| 29 | |||
| 30 | void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, | ||
| 31 | GLintptr offset, GLsizei stride) { | ||
| 32 | vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); | ||
| 33 | } | ||
| 34 | |||
| 35 | void VertexArrayPushBuffer::Bind() { | ||
| 36 | if (index_buffer) { | ||
| 37 | glVertexArrayElementBuffer(vao, *index_buffer); | ||
| 38 | } | ||
| 39 | |||
| 40 | // TODO(Rodrigo): Find a way to ARB_multi_bind this | ||
| 41 | for (const auto& entry : vertex_buffers) { | ||
| 42 | glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, | ||
| 43 | entry.stride); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 16 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 47 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 17 | 48 | ||
| 18 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 19 | 50 | ||
| 20 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { |
| 21 | first = first_; | 52 | first = first_; |
| 22 | buffers.clear(); | 53 | buffer_pointers.clear(); |
| 23 | offsets.clear(); | 54 | offsets.clear(); |
| 24 | sizes.clear(); | 55 | sizes.clear(); |
| 25 | } | 56 | } |
| 26 | 57 | ||
| 27 | void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { |
| 28 | buffers.push_back(buffer); | 59 | buffer_pointers.push_back(buffer); |
| 29 | offsets.push_back(offset); | 60 | offsets.push_back(offset); |
| 30 | sizes.push_back(size); | 61 | sizes.push_back(size); |
| 31 | } | 62 | } |
| 32 | 63 | ||
| 33 | void BindBuffersRangePushBuffer::Bind() const { | 64 | void BindBuffersRangePushBuffer::Bind() { |
| 34 | const std::size_t count{buffers.size()}; | 65 | // Ensure sizes are valid. |
| 66 | const std::size_t count{buffer_pointers.size()}; | ||
| 35 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); |
| 36 | if (count == 0) { | 68 | if (count == 0) { |
| 37 | return; | 69 | return; |
| 38 | } | 70 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 39 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), |
| 40 | sizes.data()); | 78 | sizes.data()); |
| 41 | } | 79 | } |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 4a752f3b4..6c2b45546 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -11,20 +11,49 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class BindBuffersRangePushBuffer { | 14 | class VertexArrayPushBuffer final { |
| 15 | public: | 15 | public: |
| 16 | BindBuffersRangePushBuffer(GLenum target); | 16 | explicit VertexArrayPushBuffer(); |
| 17 | ~VertexArrayPushBuffer(); | ||
| 18 | |||
| 19 | void Setup(GLuint vao_); | ||
| 20 | |||
| 21 | void SetIndexBuffer(const GLuint* buffer); | ||
| 22 | |||
| 23 | void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, | ||
| 24 | GLsizei stride); | ||
| 25 | |||
| 26 | void Bind(); | ||
| 27 | |||
| 28 | private: | ||
| 29 | struct Entry { | ||
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | |||
| 36 | GLuint vao{}; | ||
| 37 | const GLuint* index_buffer{}; | ||
| 38 | std::vector<Entry> vertex_buffers; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class BindBuffersRangePushBuffer final { | ||
| 42 | public: | ||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | ||
| 17 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 18 | 45 | ||
| 19 | void Setup(GLuint first_); | 46 | void Setup(GLuint first_); |
| 20 | 47 | ||
| 21 | void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 22 | 49 | ||
| 23 | void Bind() const; | 50 | void Bind(); |
| 24 | 51 | ||
| 25 | private: | 52 | private: |
| 26 | GLenum target; | 53 | GLenum target{}; |
| 27 | GLuint first; | 54 | GLuint first{}; |
| 55 | std::vector<const GLuint*> buffer_pointers; | ||
| 56 | |||
| 28 | std::vector<GLuint> buffers; | 57 | std::vector<GLuint> buffers; |
| 29 | std::vector<GLintptr> offsets; | 58 | std::vector<GLintptr> offsets; |
| 30 | std::vector<GLsizeiptr> sizes; | 59 | std::vector<GLsizeiptr> sizes; |