diff options
67 files changed, 2514 insertions, 2607 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index dd4c29ed3..9b931976a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -2,10 +2,8 @@ add_subdirectory(host_shaders) | |||
| 2 | 2 | ||
| 3 | add_library(video_core STATIC | 3 | add_library(video_core STATIC |
| 4 | buffer_cache/buffer_base.h | 4 | buffer_cache/buffer_base.h |
| 5 | buffer_cache/buffer_block.h | 5 | buffer_cache/buffer_cache.cpp |
| 6 | buffer_cache/buffer_cache.h | 6 | buffer_cache/buffer_cache.h |
| 7 | buffer_cache/map_interval.cpp | ||
| 8 | buffer_cache/map_interval.h | ||
| 9 | cdma_pusher.cpp | 7 | cdma_pusher.cpp |
| 10 | cdma_pusher.h | 8 | cdma_pusher.h |
| 11 | command_classes/codecs/codec.cpp | 9 | command_classes/codecs/codec.cpp |
| @@ -152,8 +150,6 @@ add_library(video_core STATIC | |||
| 152 | renderer_vulkan/vk_staging_buffer_pool.h | 150 | renderer_vulkan/vk_staging_buffer_pool.h |
| 153 | renderer_vulkan/vk_state_tracker.cpp | 151 | renderer_vulkan/vk_state_tracker.cpp |
| 154 | renderer_vulkan/vk_state_tracker.h | 152 | renderer_vulkan/vk_state_tracker.h |
| 155 | renderer_vulkan/vk_stream_buffer.cpp | ||
| 156 | renderer_vulkan/vk_stream_buffer.h | ||
| 157 | renderer_vulkan/vk_swapchain.cpp | 153 | renderer_vulkan/vk_swapchain.cpp |
| 158 | renderer_vulkan/vk_swapchain.h | 154 | renderer_vulkan/vk_swapchain.h |
| 159 | renderer_vulkan/vk_texture_cache.cpp | 155 | renderer_vulkan/vk_texture_cache.cpp |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h deleted file mode 100644 index e9306194a..000000000 --- a/src/video_core/buffer_cache/buffer_block.h +++ /dev/null | |||
| @@ -1,62 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | class BufferBlock { | ||
| 12 | public: | ||
| 13 | [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const { | ||
| 14 | return (cpu_addr < end) && (cpu_addr_end > start); | ||
| 15 | } | ||
| 16 | |||
| 17 | [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const { | ||
| 18 | return cpu_addr <= other_start && other_end <= cpu_addr_end; | ||
| 19 | } | ||
| 20 | |||
| 21 | [[nodiscard]] std::size_t Offset(VAddr in_addr) const { | ||
| 22 | return static_cast<std::size_t>(in_addr - cpu_addr); | ||
| 23 | } | ||
| 24 | |||
| 25 | [[nodiscard]] VAddr CpuAddr() const { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | [[nodiscard]] VAddr CpuAddrEnd() const { | ||
| 30 | return cpu_addr_end; | ||
| 31 | } | ||
| 32 | |||
| 33 | void SetCpuAddr(VAddr new_addr) { | ||
| 34 | cpu_addr = new_addr; | ||
| 35 | cpu_addr_end = new_addr + size; | ||
| 36 | } | ||
| 37 | |||
| 38 | [[nodiscard]] std::size_t Size() const { | ||
| 39 | return size; | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] u64 Epoch() const { | ||
| 43 | return epoch; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetEpoch(u64 new_epoch) { | ||
| 47 | epoch = new_epoch; | ||
| 48 | } | ||
| 49 | |||
| 50 | protected: | ||
| 51 | explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { | ||
| 52 | SetCpuAddr(cpu_addr_); | ||
| 53 | } | ||
| 54 | |||
| 55 | private: | ||
| 56 | VAddr cpu_addr{}; | ||
| 57 | VAddr cpu_addr_end{}; | ||
| 58 | std::size_t size{}; | ||
| 59 | u64 epoch{}; | ||
| 60 | }; | ||
| 61 | |||
| 62 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp new file mode 100644 index 000000000..ab32294c8 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.cpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/microprofile.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | |||
| 9 | MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128)); | ||
| 10 | MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); | ||
| 11 | MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); | ||
| 12 | |||
| 13 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 83b9ee871..e4f3c8e35 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -4,591 +4,1231 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <list> | 7 | #include <algorithm> |
| 8 | #include <array> | ||
| 9 | #include <deque> | ||
| 8 | #include <memory> | 10 | #include <memory> |
| 9 | #include <mutex> | 11 | #include <mutex> |
| 12 | #include <span> | ||
| 10 | #include <unordered_map> | 13 | #include <unordered_map> |
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | 14 | #include <vector> |
| 14 | 15 | ||
| 15 | #include <boost/container/small_vector.hpp> | 16 | #include <boost/container/small_vector.hpp> |
| 16 | #include <boost/icl/interval_set.hpp> | ||
| 17 | #include <boost/intrusive/set.hpp> | ||
| 18 | 17 | ||
| 19 | #include "common/alignment.h" | ||
| 20 | #include "common/assert.h" | ||
| 21 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 22 | #include "common/logging/log.h" | 19 | #include "common/div_ceil.h" |
| 23 | #include "core/core.h" | 20 | #include "common/microprofile.h" |
| 21 | #include "common/scope_exit.h" | ||
| 24 | #include "core/memory.h" | 22 | #include "core/memory.h" |
| 25 | #include "core/settings.h" | 23 | #include "core/settings.h" |
| 26 | #include "video_core/buffer_cache/buffer_block.h" | 24 | #include "video_core/buffer_cache/buffer_base.h" |
| 27 | #include "video_core/buffer_cache/map_interval.h" | 25 | #include "video_core/delayed_destruction_ring.h" |
| 26 | #include "video_core/dirty_flags.h" | ||
| 27 | #include "video_core/engines/kepler_compute.h" | ||
| 28 | #include "video_core/engines/maxwell_3d.h" | ||
| 28 | #include "video_core/memory_manager.h" | 29 | #include "video_core/memory_manager.h" |
| 29 | #include "video_core/rasterizer_interface.h" | 30 | #include "video_core/rasterizer_interface.h" |
| 31 | #include "video_core/texture_cache/slot_vector.h" | ||
| 32 | #include "video_core/texture_cache/types.h" | ||
| 30 | 33 | ||
| 31 | namespace VideoCommon { | 34 | namespace VideoCommon { |
| 32 | 35 | ||
| 33 | template <typename Buffer, typename BufferType, typename StreamBuffer> | 36 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); |
| 37 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 38 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 39 | |||
| 40 | using BufferId = SlotId; | ||
| 41 | |||
| 42 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 43 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 44 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 45 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 46 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 47 | constexpr u32 NUM_STAGES = 5; | ||
| 48 | |||
| 49 | template <typename P> | ||
| 34 | class BufferCache { | 50 | class BufferCache { |
| 35 | using IntervalSet = boost::icl::interval_set<VAddr>; | 51 | // Page size for caching purposes. |
| 36 | using IntervalType = typename IntervalSet::interval_type; | 52 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. |
| 37 | using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; | 53 | static constexpr u32 PAGE_BITS = 16; |
| 54 | static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS; | ||
| 38 | 55 | ||
| 39 | static constexpr u64 WRITE_PAGE_BIT = 11; | 56 | static constexpr bool IS_OPENGL = P::IS_OPENGL; |
| 40 | static constexpr u64 BLOCK_PAGE_BITS = 21; | 57 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = |
| 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; | 58 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; |
| 59 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 60 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 61 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 62 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 63 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 42 | 64 | ||
| 43 | public: | 65 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 44 | struct BufferInfo { | 66 | |
| 45 | BufferType handle; | 67 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | u64 offset; | 68 | |
| 47 | u64 address; | 69 | using Runtime = typename P::Runtime; |
| 70 | using Buffer = typename P::Buffer; | ||
| 71 | |||
| 72 | struct Empty {}; | ||
| 73 | |||
| 74 | struct Binding { | ||
| 75 | VAddr cpu_addr{}; | ||
| 76 | u32 size{}; | ||
| 77 | BufferId buffer_id; | ||
| 48 | }; | 78 | }; |
| 49 | 79 | ||
| 50 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | 80 | static constexpr Binding NULL_BINDING{ |
| 51 | bool is_written = false, bool use_fast_cbuf = false) { | 81 | .cpu_addr = 0, |
| 52 | std::lock_guard lock{mutex}; | 82 | .size = 0, |
| 83 | .buffer_id = NULL_BUFFER_ID, | ||
| 84 | }; | ||
| 53 | 85 | ||
| 54 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 86 | public: |
| 55 | if (!cpu_addr) { | 87 | static constexpr size_t SKIP_CACHE_SIZE = 4096; |
| 56 | return GetEmptyBuffer(size); | ||
| 57 | } | ||
| 58 | 88 | ||
| 59 | // Cache management is a big overhead, so only cache entries with a given size. | 89 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 60 | // TODO: Figure out which size is the best for given games. | 90 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 61 | constexpr std::size_t max_stream_size = 0x800; | 91 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 62 | if (use_fast_cbuf || size < max_stream_size) { | 92 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 63 | if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) { | 93 | Runtime& runtime_); |
| 64 | const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size); | ||
| 65 | if (use_fast_cbuf) { | ||
| 66 | u8* dest; | ||
| 67 | if (is_granular) { | ||
| 68 | dest = gpu_memory.GetPointer(gpu_addr); | ||
| 69 | } else { | ||
| 70 | staging_buffer.resize(size); | ||
| 71 | dest = staging_buffer.data(); | ||
| 72 | gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); | ||
| 73 | } | ||
| 74 | return ConstBufferUpload(dest, size); | ||
| 75 | } | ||
| 76 | if (is_granular) { | ||
| 77 | u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); | ||
| 78 | return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { | ||
| 79 | std::memcpy(dest, host_ptr, size); | ||
| 80 | }); | ||
| 81 | } else { | ||
| 82 | return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) { | ||
| 83 | gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); | ||
| 84 | }); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | } | ||
| 88 | 94 | ||
| 89 | Buffer* const block = GetBlock(*cpu_addr, size); | 95 | void TickFrame(); |
| 90 | MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size); | ||
| 91 | if (!map) { | ||
| 92 | return GetEmptyBuffer(size); | ||
| 93 | } | ||
| 94 | if (is_written) { | ||
| 95 | map->MarkAsModified(true, GetModifiedTicks()); | ||
| 96 | if (Settings::IsGPULevelHigh() && | ||
| 97 | Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | ||
| 98 | MarkForAsyncFlush(map); | ||
| 99 | } | ||
| 100 | if (!map->is_written) { | ||
| 101 | map->is_written = true; | ||
| 102 | MarkRegionAsWritten(map->start, map->end - 1); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | 96 | ||
| 106 | return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; | 97 | void WriteMemory(VAddr cpu_addr, u64 size); |
| 107 | } | ||
| 108 | 98 | ||
| 109 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | 99 | void CachedWriteMemory(VAddr cpu_addr, u64 size); |
| 110 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 111 | std::size_t alignment = 4) { | ||
| 112 | std::lock_guard lock{mutex}; | ||
| 113 | return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { | ||
| 114 | std::memcpy(dest, raw_pointer, size); | ||
| 115 | }); | ||
| 116 | } | ||
| 117 | 100 | ||
| 118 | /// Prepares the buffer cache for data uploading | 101 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 119 | /// @param max_size Maximum number of bytes that will be uploaded | ||
| 120 | /// @return True when a stream buffer invalidation was required, false otherwise | ||
| 121 | void Map(std::size_t max_size) { | ||
| 122 | std::lock_guard lock{mutex}; | ||
| 123 | 102 | ||
| 124 | std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); | 103 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 125 | buffer_offset = buffer_offset_base; | ||
| 126 | } | ||
| 127 | 104 | ||
| 128 | /// Finishes the upload stream | 105 | void UpdateGraphicsBuffers(bool is_indexed); |
| 129 | void Unmap() { | ||
| 130 | std::lock_guard lock{mutex}; | ||
| 131 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | ||
| 132 | } | ||
| 133 | 106 | ||
| 134 | /// Function called at the end of each frame, inteded for deferred operations | 107 | void UpdateComputeBuffers(); |
| 135 | void TickFrame() { | ||
| 136 | ++epoch; | ||
| 137 | 108 | ||
| 138 | while (!pending_destruction.empty()) { | 109 | void BindHostGeometryBuffers(bool is_indexed); |
| 139 | // Delay at least 4 frames before destruction. | ||
| 140 | // This is due to triple buffering happening on some drivers. | ||
| 141 | static constexpr u64 epochs_to_destroy = 5; | ||
| 142 | if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | pending_destruction.pop(); | ||
| 146 | } | ||
| 147 | } | ||
| 148 | 110 | ||
| 149 | /// Write any cached resources overlapping the specified region back to memory | 111 | void BindHostStageBuffers(size_t stage); |
| 150 | void FlushRegion(VAddr addr, std::size_t size) { | ||
| 151 | std::lock_guard lock{mutex}; | ||
| 152 | 112 | ||
| 153 | VectorMapInterval objects = GetMapsInRange(addr, size); | 113 | void BindHostComputeBuffers(); |
| 154 | std::sort(objects.begin(), objects.end(), | ||
| 155 | [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); | ||
| 156 | for (MapInterval* object : objects) { | ||
| 157 | if (object->is_modified && object->is_registered) { | ||
| 158 | mutex.unlock(); | ||
| 159 | FlushMap(object); | ||
| 160 | mutex.lock(); | ||
| 161 | } | ||
| 162 | } | ||
| 163 | } | ||
| 164 | 114 | ||
| 165 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 115 | void SetEnabledUniformBuffers(size_t stage, u32 enabled); |
| 166 | std::lock_guard lock{mutex}; | ||
| 167 | 116 | ||
| 168 | const VectorMapInterval objects = GetMapsInRange(addr, size); | 117 | void SetEnabledComputeUniformBuffers(u32 enabled); |
| 169 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { | ||
| 170 | return map->is_modified && map->is_registered; | ||
| 171 | }); | ||
| 172 | } | ||
| 173 | 118 | ||
| 174 | /// Mark the specified region as being invalidated | 119 | void UnbindGraphicsStorageBuffers(size_t stage); |
| 175 | void InvalidateRegion(VAddr addr, u64 size) { | ||
| 176 | std::lock_guard lock{mutex}; | ||
| 177 | 120 | ||
| 178 | for (auto& object : GetMapsInRange(addr, size)) { | 121 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 179 | if (object->is_registered) { | 122 | bool is_written); |
| 180 | Unregister(object); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | } | ||
| 184 | 123 | ||
| 185 | void OnCPUWrite(VAddr addr, std::size_t size) { | 124 | void UnbindComputeStorageBuffers(); |
| 186 | std::lock_guard lock{mutex}; | ||
| 187 | 125 | ||
| 188 | for (MapInterval* object : GetMapsInRange(addr, size)) { | 126 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 189 | if (object->is_memory_marked && object->is_registered) { | 127 | bool is_written); |
| 190 | UnmarkMemory(object); | ||
| 191 | object->is_sync_pending = true; | ||
| 192 | marked_for_unregister.emplace_back(object); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | } | ||
| 196 | 128 | ||
| 197 | void SyncGuestHost() { | 129 | void FlushCachedWrites(); |
| 198 | std::lock_guard lock{mutex}; | ||
| 199 | 130 | ||
| 200 | for (auto& object : marked_for_unregister) { | 131 | /// Return true when there are uncommitted buffers to be downloaded |
| 201 | if (object->is_registered) { | 132 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| 202 | object->is_sync_pending = false; | 133 | |
| 203 | Unregister(object); | 134 | /// Return true when the caller should wait for async downloads |
| 204 | } | 135 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
| 136 | |||
| 137 | /// Commit asynchronous downloads | ||
| 138 | void CommitAsyncFlushes(); | ||
| 139 | |||
| 140 | /// Pop asynchronous downloads | ||
| 141 | void PopAsyncFlushes(); | ||
| 142 | |||
| 143 | /// Return true when a CPU region is modified from the GPU | ||
| 144 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 145 | |||
| 146 | std::mutex mutex; | ||
| 147 | |||
| 148 | private: | ||
| 149 | template <typename Func> | ||
| 150 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 151 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 152 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 153 | index += disabled_bits; | ||
| 154 | enabled_mask >>= disabled_bits; | ||
| 155 | func(index); | ||
| 205 | } | 156 | } |
| 206 | marked_for_unregister.clear(); | ||
| 207 | } | 157 | } |
| 208 | 158 | ||
| 209 | void CommitAsyncFlushes() { | 159 | template <typename Func> |
| 210 | if (uncommitted_flushes) { | 160 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { |
| 211 | auto commit_list = std::make_shared<std::list<MapInterval*>>(); | 161 | const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE); |
| 212 | for (MapInterval* map : *uncommitted_flushes) { | 162 | for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) { |
| 213 | if (map->is_registered && map->is_modified) { | 163 | const BufferId buffer_id = page_table[page]; |
| 214 | // TODO(Blinkhawk): Implement backend asynchronous flushing | 164 | if (!buffer_id) { |
| 215 | // AsyncFlushMap(map) | 165 | ++page; |
| 216 | commit_list->push_back(map); | 166 | continue; |
| 217 | } | ||
| 218 | } | ||
| 219 | if (!commit_list->empty()) { | ||
| 220 | committed_flushes.push_back(commit_list); | ||
| 221 | } else { | ||
| 222 | committed_flushes.emplace_back(); | ||
| 223 | } | 167 | } |
| 224 | } else { | 168 | Buffer& buffer = slot_buffers[buffer_id]; |
| 225 | committed_flushes.emplace_back(); | 169 | func(buffer_id, buffer); |
| 170 | |||
| 171 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 172 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 226 | } | 173 | } |
| 227 | uncommitted_flushes.reset(); | ||
| 228 | } | 174 | } |
| 229 | 175 | ||
| 230 | bool ShouldWaitAsyncFlushes() const { | 176 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |
| 231 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | 177 | return (cpu_addr & ~Core::Memory::PAGE_MASK) == |
| 178 | ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); | ||
| 232 | } | 179 | } |
| 233 | 180 | ||
| 234 | bool HasUncommittedFlushes() const { | 181 | void BindHostIndexBuffer(); |
| 235 | return uncommitted_flushes != nullptr; | ||
| 236 | } | ||
| 237 | 182 | ||
| 238 | void PopAsyncFlushes() { | 183 | void BindHostVertexBuffers(); |
| 239 | if (committed_flushes.empty()) { | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | auto& flush_list = committed_flushes.front(); | ||
| 243 | if (!flush_list) { | ||
| 244 | committed_flushes.pop_front(); | ||
| 245 | return; | ||
| 246 | } | ||
| 247 | for (MapInterval* map : *flush_list) { | ||
| 248 | if (map->is_registered) { | ||
| 249 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush | ||
| 250 | FlushMap(map); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | committed_flushes.pop_front(); | ||
| 254 | } | ||
| 255 | 184 | ||
| 256 | virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; | 185 | void BindHostGraphicsUniformBuffers(size_t stage); |
| 257 | 186 | ||
| 258 | protected: | 187 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); |
| 259 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 260 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 261 | StreamBuffer& stream_buffer_) | ||
| 262 | : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, | ||
| 263 | stream_buffer{stream_buffer_} {} | ||
| 264 | 188 | ||
| 265 | ~BufferCache() = default; | 189 | void BindHostGraphicsStorageBuffers(size_t stage); |
| 266 | 190 | ||
| 267 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | 191 | void BindHostTransformFeedbackBuffers(); |
| 268 | 192 | ||
| 269 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | 193 | void BindHostComputeUniformBuffers(); |
| 270 | return {}; | ||
| 271 | } | ||
| 272 | 194 | ||
| 273 | /// Register an object into the cache | 195 | void BindHostComputeStorageBuffers(); |
| 274 | MapInterval* Register(MapInterval new_map, bool inherit_written = false) { | ||
| 275 | const VAddr cpu_addr = new_map.start; | ||
| 276 | if (!cpu_addr) { | ||
| 277 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | ||
| 278 | new_map.gpu_addr); | ||
| 279 | return nullptr; | ||
| 280 | } | ||
| 281 | const std::size_t size = new_map.end - new_map.start; | ||
| 282 | new_map.is_registered = true; | ||
| 283 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 284 | new_map.is_memory_marked = true; | ||
| 285 | if (inherit_written) { | ||
| 286 | MarkRegionAsWritten(new_map.start, new_map.end - 1); | ||
| 287 | new_map.is_written = true; | ||
| 288 | } | ||
| 289 | MapInterval* const storage = mapped_addresses_allocator.Allocate(); | ||
| 290 | *storage = new_map; | ||
| 291 | mapped_addresses.insert(*storage); | ||
| 292 | return storage; | ||
| 293 | } | ||
| 294 | 196 | ||
| 295 | void UnmarkMemory(MapInterval* map) { | 197 | void DoUpdateGraphicsBuffers(bool is_indexed); |
| 296 | if (!map->is_memory_marked) { | 198 | |
| 297 | return; | 199 | void DoUpdateComputeBuffers(); |
| 298 | } | 200 | |
| 299 | const std::size_t size = map->end - map->start; | 201 | void UpdateIndexBuffer(); |
| 300 | rasterizer.UpdatePagesCachedCount(map->start, size, -1); | 202 | |
| 301 | map->is_memory_marked = false; | 203 | void UpdateVertexBuffers(); |
| 302 | } | 204 | |
| 303 | 205 | void UpdateVertexBuffer(u32 index); | |
| 304 | /// Unregisters an object from the cache | 206 | |
| 305 | void Unregister(MapInterval* map) { | 207 | void UpdateUniformBuffers(size_t stage); |
| 306 | UnmarkMemory(map); | 208 | |
| 307 | map->is_registered = false; | 209 | void UpdateStorageBuffers(size_t stage); |
| 308 | if (map->is_sync_pending) { | 210 | |
| 309 | map->is_sync_pending = false; | 211 | void UpdateTransformFeedbackBuffers(); |
| 310 | marked_for_unregister.remove(map); | 212 | |
| 213 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 214 | |||
| 215 | void UpdateComputeUniformBuffers(); | ||
| 216 | |||
| 217 | void UpdateComputeStorageBuffers(); | ||
| 218 | |||
| 219 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 220 | |||
| 221 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 222 | |||
| 223 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 224 | |||
| 225 | void Register(BufferId buffer_id); | ||
| 226 | |||
| 227 | void Unregister(BufferId buffer_id); | ||
| 228 | |||
| 229 | template <bool insert> | ||
| 230 | void ChangeRegister(BufferId buffer_id); | ||
| 231 | |||
| 232 | void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 233 | |||
| 234 | void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 235 | |||
| 236 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 237 | std::span<BufferCopy> copies); | ||
| 238 | |||
| 239 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 240 | std::span<const BufferCopy> copies); | ||
| 241 | |||
| 242 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | ||
| 243 | std::span<const BufferCopy> copies); | ||
| 244 | |||
| 245 | void DeleteBuffer(BufferId buffer_id); | ||
| 246 | |||
| 247 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | ||
| 248 | |||
| 249 | void NotifyBufferDeletion(); | ||
| 250 | |||
| 251 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; | ||
| 252 | |||
| 253 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 254 | |||
| 255 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 256 | |||
| 257 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 258 | |||
| 259 | VideoCore::RasterizerInterface& rasterizer; | ||
| 260 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 261 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 262 | Tegra::MemoryManager& gpu_memory; | ||
| 263 | Core::Memory::Memory& cpu_memory; | ||
| 264 | Runtime& runtime; | ||
| 265 | |||
| 266 | SlotVector<Buffer> slot_buffers; | ||
| 267 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 268 | |||
| 269 | u32 last_index_count = 0; | ||
| 270 | |||
| 271 | Binding index_buffer; | ||
| 272 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 273 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 274 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 275 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 276 | |||
| 277 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 278 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 279 | |||
| 280 | std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; | ||
| 281 | u32 enabled_compute_uniform_buffers = 0; | ||
| 282 | |||
| 283 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 284 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 285 | u32 enabled_compute_storage_buffers = 0; | ||
| 286 | u32 written_compute_storage_buffers = 0; | ||
| 287 | |||
| 288 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | ||
| 289 | |||
| 290 | bool has_deleted_buffers = false; | ||
| 291 | |||
| 292 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | ||
| 293 | dirty_uniform_buffers{}; | ||
| 294 | |||
| 295 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 296 | |||
| 297 | // TODO: This data structure is not optimal and it should be reworked | ||
| 298 | std::vector<BufferId> uncommitted_downloads; | ||
| 299 | std::deque<std::vector<BufferId>> committed_downloads; | ||
| 300 | |||
| 301 | size_t immediate_buffer_capacity = 0; | ||
| 302 | std::unique_ptr<u8[]> immediate_buffer_alloc; | ||
| 303 | |||
| 304 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | ||
| 305 | }; | ||
| 306 | |||
| 307 | template <class P> | ||
| 308 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 309 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 310 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 311 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 312 | Runtime& runtime_) | ||
| 313 | : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | ||
| 314 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | ||
| 315 | // Ensure the first slot is used for the null buffer | ||
| 316 | void(slot_buffers.insert(runtime, NullBufferParams{})); | ||
| 317 | } | ||
| 318 | |||
| 319 | template <class P> | ||
| 320 | void BufferCache<P>::TickFrame() { | ||
| 321 | delayed_destruction_ring.Tick(); | ||
| 322 | } | ||
| 323 | |||
| 324 | template <class P> | ||
| 325 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | ||
| 326 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | ||
| 327 | buffer.MarkRegionAsCpuModified(cpu_addr, size); | ||
| 328 | }); | ||
| 329 | } | ||
| 330 | |||
| 331 | template <class P> | ||
| 332 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | ||
| 333 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | ||
| 334 | if (!buffer.HasCachedWrites()) { | ||
| 335 | cached_write_buffer_ids.push_back(buffer_id); | ||
| 311 | } | 336 | } |
| 312 | if (map->is_written) { | 337 | buffer.CachedCpuWrite(cpu_addr, size); |
| 313 | UnmarkRegionAsWritten(map->start, map->end - 1); | 338 | }); |
| 339 | } | ||
| 340 | |||
| 341 | template <class P> | ||
| 342 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | ||
| 343 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | ||
| 344 | boost::container::small_vector<BufferCopy, 1> copies; | ||
| 345 | u64 total_size_bytes = 0; | ||
| 346 | u64 largest_copy = 0; | ||
| 347 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 348 | copies.push_back(BufferCopy{ | ||
| 349 | .src_offset = range_offset, | ||
| 350 | .dst_offset = total_size_bytes, | ||
| 351 | .size = range_size, | ||
| 352 | }); | ||
| 353 | total_size_bytes += range_size; | ||
| 354 | largest_copy = std::max(largest_copy, range_size); | ||
| 355 | }); | ||
| 356 | if (total_size_bytes == 0) { | ||
| 357 | return; | ||
| 314 | } | 358 | } |
| 315 | const auto it = mapped_addresses.find(*map); | 359 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 316 | ASSERT(it != mapped_addresses.end()); | 360 | |
| 317 | mapped_addresses.erase(it); | 361 | if constexpr (USE_MEMORY_MAPS) { |
| 318 | mapped_addresses_allocator.Release(map); | 362 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 319 | } | 363 | const u8* const mapped_memory = download_staging.mapped_span.data(); |
| 320 | 364 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | |
| 321 | private: | 365 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); |
| 322 | MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { | 366 | runtime.Finish(); |
| 323 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); | 367 | for (const BufferCopy& copy : copies) { |
| 324 | if (overlaps.empty()) { | 368 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 325 | const VAddr cpu_addr_end = cpu_addr + size; | 369 | const u8* copy_mapped_memory = mapped_memory + copy.dst_offset; |
| 326 | if (gpu_memory.IsGranularRange(gpu_addr, size)) { | 370 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); |
| 327 | u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); | ||
| 328 | block->Upload(block->Offset(cpu_addr), size, host_ptr); | ||
| 329 | } else { | ||
| 330 | staging_buffer.resize(size); | ||
| 331 | gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 332 | block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); | ||
| 333 | } | 371 | } |
| 334 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); | 372 | } else { |
| 335 | } | 373 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| 336 | 374 | for (const BufferCopy& copy : copies) { | |
| 337 | const VAddr cpu_addr_end = cpu_addr + size; | 375 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |
| 338 | if (overlaps.size() == 1) { | 376 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 339 | MapInterval* const current_map = overlaps[0]; | 377 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); |
| 340 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { | ||
| 341 | return current_map; | ||
| 342 | } | 378 | } |
| 343 | } | 379 | } |
| 344 | VAddr new_start = cpu_addr; | 380 | }); |
| 345 | VAddr new_end = cpu_addr_end; | 381 | } |
| 346 | bool write_inheritance = false; | 382 | |
| 347 | bool modified_inheritance = false; | 383 | template <class P> |
| 348 | // Calculate new buffer parameters | 384 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 349 | for (MapInterval* overlap : overlaps) { | 385 | u32 size) { |
| 350 | new_start = std::min(overlap->start, new_start); | 386 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 351 | new_end = std::max(overlap->end, new_end); | 387 | if (!cpu_addr) { |
| 352 | write_inheritance |= overlap->is_written; | 388 | uniform_buffers[stage][index] = NULL_BINDING; |
| 353 | modified_inheritance |= overlap->is_modified; | 389 | return; |
| 390 | } | ||
| 391 | const Binding binding{ | ||
| 392 | .cpu_addr = *cpu_addr, | ||
| 393 | .size = size, | ||
| 394 | .buffer_id = BufferId{}, | ||
| 395 | }; | ||
| 396 | uniform_buffers[stage][index] = binding; | ||
| 397 | } | ||
| 398 | |||
| 399 | template <class P> | ||
| 400 | void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { | ||
| 401 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); | ||
| 402 | do { | ||
| 403 | has_deleted_buffers = false; | ||
| 404 | DoUpdateGraphicsBuffers(is_indexed); | ||
| 405 | } while (has_deleted_buffers); | ||
| 406 | } | ||
| 407 | |||
| 408 | template <class P> | ||
| 409 | void BufferCache<P>::UpdateComputeBuffers() { | ||
| 410 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); | ||
| 411 | do { | ||
| 412 | has_deleted_buffers = false; | ||
| 413 | DoUpdateComputeBuffers(); | ||
| 414 | } while (has_deleted_buffers); | ||
| 415 | } | ||
| 416 | |||
| 417 | template <class P> | ||
| 418 | void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | ||
| 419 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | ||
| 420 | if (is_indexed) { | ||
| 421 | BindHostIndexBuffer(); | ||
| 422 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||
| 423 | const auto& regs = maxwell3d.regs; | ||
| 424 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | ||
| 425 | runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); | ||
| 354 | } | 426 | } |
| 355 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; | 427 | } |
| 356 | for (auto& overlap : overlaps) { | 428 | BindHostVertexBuffers(); |
| 357 | Unregister(overlap); | 429 | BindHostTransformFeedbackBuffers(); |
| 430 | } | ||
| 431 | |||
| 432 | template <class P> | ||
| 433 | void BufferCache<P>::BindHostStageBuffers(size_t stage) { | ||
| 434 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | ||
| 435 | BindHostGraphicsUniformBuffers(stage); | ||
| 436 | BindHostGraphicsStorageBuffers(stage); | ||
| 437 | } | ||
| 438 | |||
| 439 | template <class P> | ||
| 440 | void BufferCache<P>::BindHostComputeBuffers() { | ||
| 441 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | ||
| 442 | BindHostComputeUniformBuffers(); | ||
| 443 | BindHostComputeStorageBuffers(); | ||
| 444 | } | ||
| 445 | |||
| 446 | template <class P> | ||
| 447 | void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { | ||
| 448 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 449 | if (enabled_uniform_buffers[stage] != enabled) { | ||
| 450 | dirty_uniform_buffers[stage] = ~u32{0}; | ||
| 358 | } | 451 | } |
| 359 | UpdateBlock(block, new_start, new_end, overlaps); | 452 | } |
| 360 | 453 | enabled_uniform_buffers[stage] = enabled; | |
| 361 | const MapInterval new_map{new_start, new_end, new_gpu_addr}; | 454 | } |
| 362 | MapInterval* const map = Register(new_map, write_inheritance); | 455 | |
| 363 | if (!map) { | 456 | template <class P> |
| 364 | return nullptr; | 457 | void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { |
| 458 | enabled_compute_uniform_buffers = enabled; | ||
| 459 | } | ||
| 460 | |||
| 461 | template <class P> | ||
| 462 | void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { | ||
| 463 | enabled_storage_buffers[stage] = 0; | ||
| 464 | written_storage_buffers[stage] = 0; | ||
| 465 | } | ||
| 466 | |||
| 467 | template <class P> | ||
| 468 | void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, | ||
| 469 | u32 cbuf_offset, bool is_written) { | ||
| 470 | enabled_storage_buffers[stage] |= 1U << ssbo_index; | ||
| 471 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; | ||
| 472 | |||
| 473 | const auto& cbufs = maxwell3d.state.shader_stages[stage]; | ||
| 474 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; | ||
| 475 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); | ||
| 476 | } | ||
| 477 | |||
| 478 | template <class P> | ||
| 479 | void BufferCache<P>::UnbindComputeStorageBuffers() { | ||
| 480 | enabled_compute_storage_buffers = 0; | ||
| 481 | written_compute_storage_buffers = 0; | ||
| 482 | } | ||
| 483 | |||
| 484 | template <class P> | ||
| 485 | void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 486 | bool is_written) { | ||
| 487 | enabled_compute_storage_buffers |= 1U << ssbo_index; | ||
| 488 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; | ||
| 489 | |||
| 490 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 491 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); | ||
| 492 | |||
| 493 | const auto& cbufs = launch_desc.const_buffer_config; | ||
| 494 | const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; | ||
| 495 | compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); | ||
| 496 | } | ||
| 497 | |||
| 498 | template <class P> | ||
| 499 | void BufferCache<P>::FlushCachedWrites() { | ||
| 500 | for (const BufferId buffer_id : cached_write_buffer_ids) { | ||
| 501 | slot_buffers[buffer_id].FlushCachedWrites(); | ||
| 502 | } | ||
| 503 | cached_write_buffer_ids.clear(); | ||
| 504 | } | ||
| 505 | |||
| 506 | template <class P> | ||
| 507 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | ||
| 508 | return !uncommitted_downloads.empty(); | ||
| 509 | } | ||
| 510 | |||
| 511 | template <class P> | ||
| 512 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | ||
| 513 | return !committed_downloads.empty() && !committed_downloads.front().empty(); | ||
| 514 | } | ||
| 515 | |||
| 516 | template <class P> | ||
| 517 | void BufferCache<P>::CommitAsyncFlushes() { | ||
| 518 | // This is intentionally passing the value by copy | ||
| 519 | committed_downloads.push_front(uncommitted_downloads); | ||
| 520 | uncommitted_downloads.clear(); | ||
| 521 | } | ||
| 522 | |||
| 523 | template <class P> | ||
| 524 | void BufferCache<P>::PopAsyncFlushes() { | ||
| 525 | if (committed_downloads.empty()) { | ||
| 526 | return; | ||
| 527 | } | ||
| 528 | auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); }); | ||
| 529 | const std::span<const BufferId> download_ids = committed_downloads.back(); | ||
| 530 | if (download_ids.empty()) { | ||
| 531 | return; | ||
| 532 | } | ||
| 533 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 534 | |||
| 535 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | ||
| 536 | u64 total_size_bytes = 0; | ||
| 537 | u64 largest_copy = 0; | ||
| 538 | for (const BufferId buffer_id : download_ids) { | ||
| 539 | slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) { | ||
| 540 | downloads.push_back({ | ||
| 541 | BufferCopy{ | ||
| 542 | .src_offset = range_offset, | ||
| 543 | .dst_offset = total_size_bytes, | ||
| 544 | .size = range_size, | ||
| 545 | }, | ||
| 546 | buffer_id, | ||
| 547 | }); | ||
| 548 | total_size_bytes += range_size; | ||
| 549 | largest_copy = std::max(largest_copy, range_size); | ||
| 550 | }); | ||
| 551 | } | ||
| 552 | if (downloads.empty()) { | ||
| 553 | return; | ||
| 554 | } | ||
| 555 | if constexpr (USE_MEMORY_MAPS) { | ||
| 556 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 557 | for (const auto [copy, buffer_id] : downloads) { | ||
| 558 | const std::array copies{copy}; | ||
| 559 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); | ||
| 365 | } | 560 | } |
| 366 | if (modified_inheritance) { | 561 | runtime.Finish(); |
| 367 | map->MarkAsModified(true, GetModifiedTicks()); | 562 | for (const auto [copy, buffer_id] : downloads) { |
| 368 | if (Settings::IsGPULevelHigh() && | 563 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 369 | Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 564 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 370 | MarkForAsyncFlush(map); | 565 | const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset; |
| 371 | } | 566 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); |
| 567 | } | ||
| 568 | } else { | ||
| 569 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 570 | for (const auto [copy, buffer_id] : downloads) { | ||
| 571 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 572 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 573 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 574 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 372 | } | 575 | } |
| 373 | return map; | ||
| 374 | } | 576 | } |
| 375 | 577 | } | |
| 376 | void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { | 578 | |
| 377 | const IntervalType base_interval{start, end}; | 579 | template <class P> |
| 378 | IntervalSet interval_set{}; | 580 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 379 | interval_set.add(base_interval); | 581 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); |
| 380 | for (auto& overlap : overlaps) { | 582 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { |
| 381 | const IntervalType subtract{overlap->start, overlap->end}; | 583 | const BufferId image_id = page_table[page]; |
| 382 | interval_set.subtract(subtract); | 584 | if (!image_id) { |
| 585 | ++page; | ||
| 586 | continue; | ||
| 383 | } | 587 | } |
| 384 | for (auto& interval : interval_set) { | 588 | Buffer& buffer = slot_buffers[image_id]; |
| 385 | const std::size_t size = interval.upper() - interval.lower(); | 589 | if (buffer.IsRegionGpuModified(addr, size)) { |
| 386 | if (size == 0) { | 590 | return true; |
| 387 | continue; | ||
| 388 | } | ||
| 389 | staging_buffer.resize(size); | ||
| 390 | cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||
| 391 | block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); | ||
| 392 | } | 591 | } |
| 592 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 593 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 393 | } | 594 | } |
| 394 | 595 | return false; | |
| 395 | VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { | 596 | } |
| 396 | VectorMapInterval result; | 597 | |
| 397 | if (size == 0) { | 598 | template <class P> |
| 398 | return result; | 599 | void BufferCache<P>::BindHostIndexBuffer() { |
| 600 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | ||
| 601 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | ||
| 602 | const u32 size = index_buffer.size; | ||
| 603 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | ||
| 604 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||
| 605 | runtime.BindIndexBuffer(buffer, offset, size); | ||
| 606 | } else { | ||
| 607 | runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, | ||
| 608 | maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, | ||
| 609 | buffer, offset, size); | ||
| 610 | } | ||
| 611 | } | ||
| 612 | |||
| 613 | template <class P> | ||
| 614 | void BufferCache<P>::BindHostVertexBuffers() { | ||
| 615 | auto& flags = maxwell3d.dirty.flags; | ||
| 616 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | ||
| 617 | const Binding& binding = vertex_buffers[index]; | ||
| 618 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 619 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | ||
| 620 | if (!flags[Dirty::VertexBuffer0 + index]) { | ||
| 621 | continue; | ||
| 399 | } | 622 | } |
| 623 | flags[Dirty::VertexBuffer0 + index] = false; | ||
| 624 | |||
| 625 | const u32 stride = maxwell3d.regs.vertex_array[index].stride; | ||
| 626 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 627 | runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); | ||
| 628 | } | ||
| 629 | } | ||
| 400 | 630 | ||
| 401 | const VAddr addr_end = addr + size; | 631 | template <class P> |
| 402 | auto it = mapped_addresses.lower_bound(addr); | 632 | void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { |
| 403 | if (it != mapped_addresses.begin()) { | 633 | u32 dirty = ~0U; |
| 404 | --it; | 634 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 635 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); | ||
| 636 | } | ||
| 637 | u32 binding_index = 0; | ||
| 638 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | ||
| 639 | const bool needs_bind = ((dirty >> index) & 1) != 0; | ||
| 640 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); | ||
| 641 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | ||
| 642 | ++binding_index; | ||
| 405 | } | 643 | } |
| 406 | while (it != mapped_addresses.end() && it->start < addr_end) { | 644 | }); |
| 407 | if (it->Overlaps(addr, addr_end)) { | 645 | } |
| 408 | result.push_back(&*it); | 646 | |
| 647 | template <class P> | ||
| 648 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, | ||
| 649 | bool needs_bind) { | ||
| 650 | const Binding& binding = uniform_buffers[stage][index]; | ||
| 651 | const VAddr cpu_addr = binding.cpu_addr; | ||
| 652 | const u32 size = binding.size; | ||
| 653 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 654 | if constexpr (IS_OPENGL) { | ||
| 655 | if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { | ||
| 656 | if (runtime.HasFastBufferSubData()) { | ||
| 657 | // Fast path for Nvidia | ||
| 658 | if (!HasFastUniformBufferBound(stage, binding_index)) { | ||
| 659 | // We only have to bind when the currently bound buffer is not the fast version | ||
| 660 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; | ||
| 661 | runtime.BindFastUniformBuffer(stage, binding_index, size); | ||
| 662 | } | ||
| 663 | const auto span = ImmediateBufferWithData(cpu_addr, size); | ||
| 664 | runtime.PushFastUniformBuffer(stage, binding_index, span); | ||
| 665 | } else { | ||
| 666 | // Stream buffer path to avoid stalling on non-Nvidia drivers | ||
| 667 | const auto span = runtime.BindMappedUniformBuffer(stage, binding_index, size); | ||
| 668 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | ||
| 409 | } | 669 | } |
| 410 | ++it; | 670 | return; |
| 411 | } | 671 | } |
| 412 | return result; | ||
| 413 | } | 672 | } |
| 414 | 673 | // Classic cached path | |
| 415 | /// Returns a ticks counter used for tracking when cached objects were last modified | 674 | SynchronizeBuffer(buffer, cpu_addr, size); |
| 416 | u64 GetModifiedTicks() { | 675 | if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { |
| 417 | return ++modified_ticks; | 676 | // Skip binding if it's not needed and if the bound buffer is not the fast version |
| 677 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens | ||
| 678 | return; | ||
| 418 | } | 679 | } |
| 680 | fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | ||
| 419 | 681 | ||
| 420 | void FlushMap(MapInterval* map) { | 682 | const u32 offset = buffer.Offset(cpu_addr); |
| 421 | const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); | 683 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 422 | ASSERT_OR_EXECUTE(it != blocks.end(), return;); | 684 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| 423 | 685 | } else { | |
| 424 | std::shared_ptr<Buffer> block = it->second; | 686 | runtime.BindUniformBuffer(buffer, offset, size); |
| 425 | |||
| 426 | const std::size_t size = map->end - map->start; | ||
| 427 | staging_buffer.resize(size); | ||
| 428 | block->Download(block->Offset(map->start), size, staging_buffer.data()); | ||
| 429 | cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size); | ||
| 430 | map->MarkAsModified(false, 0); | ||
| 431 | } | 687 | } |
| 688 | } | ||
| 689 | |||
| 690 | template <class P> | ||
| 691 | void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | ||
| 692 | u32 binding_index = 0; | ||
| 693 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | ||
| 694 | const Binding& binding = storage_buffers[stage][index]; | ||
| 695 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 696 | const u32 size = binding.size; | ||
| 697 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 698 | |||
| 699 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 700 | const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; | ||
| 701 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | ||
| 702 | runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); | ||
| 703 | ++binding_index; | ||
| 704 | } else { | ||
| 705 | runtime.BindStorageBuffer(buffer, offset, size, is_written); | ||
| 706 | } | ||
| 707 | }); | ||
| 708 | } | ||
| 432 | 709 | ||
| 433 | template <typename Callable> | 710 | template <class P> |
| 434 | BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { | 711 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { |
| 435 | AlignBuffer(alignment); | 712 | if (maxwell3d.regs.tfb_enabled == 0) { |
| 436 | const std::size_t uploaded_offset = buffer_offset; | 713 | return; |
| 437 | callable(buffer_ptr); | ||
| 438 | |||
| 439 | buffer_ptr += size; | ||
| 440 | buffer_offset += size; | ||
| 441 | return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; | ||
| 442 | } | 714 | } |
| 443 | 715 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | |
| 444 | void AlignBuffer(std::size_t alignment) { | 716 | const Binding& binding = transform_feedback_buffers[index]; |
| 445 | // Align the offset, not the mapped pointer | 717 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 446 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | 718 | const u32 size = binding.size; |
| 447 | buffer_ptr += offset_aligned - buffer_offset; | 719 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 448 | buffer_offset = offset_aligned; | 720 | |
| 721 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 722 | runtime.BindTransformFeedbackBuffer(index, buffer, offset, size); | ||
| 449 | } | 723 | } |
| 724 | } | ||
| 450 | 725 | ||
| 451 | std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { | 726 | template <class P> |
| 452 | const std::size_t old_size = buffer->Size(); | 727 | void BufferCache<P>::BindHostComputeUniformBuffers() { |
| 453 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; | 728 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 454 | const VAddr cpu_addr = buffer->CpuAddr(); | 729 | // Mark all uniform buffers as dirty |
| 455 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); | 730 | dirty_uniform_buffers.fill(~u32{0}); |
| 456 | new_buffer->CopyFrom(*buffer, 0, 0, old_size); | 731 | } |
| 457 | QueueDestruction(std::move(buffer)); | 732 | u32 binding_index = 0; |
| 458 | 733 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | |
| 459 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; | 734 | const Binding& binding = compute_uniform_buffers[index]; |
| 460 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; | 735 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 461 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { | 736 | const u32 size = binding.size; |
| 462 | blocks.insert_or_assign(page_start, new_buffer); | 737 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 738 | |||
| 739 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 740 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | ||
| 741 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | ||
| 742 | ++binding_index; | ||
| 743 | } else { | ||
| 744 | runtime.BindUniformBuffer(buffer, offset, size); | ||
| 463 | } | 745 | } |
| 746 | }); | ||
| 747 | } | ||
| 748 | |||
| 749 | template <class P> | ||
| 750 | void BufferCache<P>::BindHostComputeStorageBuffers() { | ||
| 751 | u32 binding_index = 0; | ||
| 752 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | ||
| 753 | const Binding& binding = compute_storage_buffers[index]; | ||
| 754 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 755 | const u32 size = binding.size; | ||
| 756 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 757 | |||
| 758 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 759 | const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; | ||
| 760 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | ||
| 761 | runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); | ||
| 762 | ++binding_index; | ||
| 763 | } else { | ||
| 764 | runtime.BindStorageBuffer(buffer, offset, size, is_written); | ||
| 765 | } | ||
| 766 | }); | ||
| 767 | } | ||
| 464 | 768 | ||
| 465 | return new_buffer; | 769 | template <class P> |
| 770 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | ||
| 771 | if (is_indexed) { | ||
| 772 | UpdateIndexBuffer(); | ||
| 466 | } | 773 | } |
| 774 | UpdateVertexBuffers(); | ||
| 775 | UpdateTransformFeedbackBuffers(); | ||
| 776 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 777 | UpdateUniformBuffers(stage); | ||
| 778 | UpdateStorageBuffers(stage); | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 782 | template <class P> | ||
| 783 | void BufferCache<P>::DoUpdateComputeBuffers() { | ||
| 784 | UpdateComputeUniformBuffers(); | ||
| 785 | UpdateComputeStorageBuffers(); | ||
| 786 | } | ||
| 787 | |||
| 788 | template <class P> | ||
| 789 | void BufferCache<P>::UpdateIndexBuffer() { | ||
| 790 | // We have to check for the dirty flags and index count | ||
| 791 | // The index count is currently changed without updating the dirty flags | ||
| 792 | const auto& index_array = maxwell3d.regs.index_array; | ||
| 793 | auto& flags = maxwell3d.dirty.flags; | ||
| 794 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { | ||
| 795 | return; | ||
| 796 | } | ||
| 797 | flags[Dirty::IndexBuffer] = false; | ||
| 798 | last_index_count = index_array.count; | ||
| 799 | |||
| 800 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | ||
| 801 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | ||
| 802 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | ||
| 803 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | ||
| 804 | const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); | ||
| 805 | const u32 size = std::min(address_size, draw_size); | ||
| 806 | if (size == 0 || !cpu_addr) { | ||
| 807 | index_buffer = NULL_BINDING; | ||
| 808 | return; | ||
| 809 | } | ||
| 810 | index_buffer = Binding{ | ||
| 811 | .cpu_addr = *cpu_addr, | ||
| 812 | .size = size, | ||
| 813 | .buffer_id = FindBuffer(*cpu_addr, size), | ||
| 814 | }; | ||
| 815 | } | ||
| 467 | 816 | ||
| 468 | std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, | 817 | template <class P> |
| 469 | std::shared_ptr<Buffer> second) { | 818 | void BufferCache<P>::UpdateVertexBuffers() { |
| 470 | const std::size_t size_1 = first->Size(); | 819 | auto& flags = maxwell3d.dirty.flags; |
| 471 | const std::size_t size_2 = second->Size(); | 820 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { |
| 472 | const VAddr first_addr = first->CpuAddr(); | 821 | return; |
| 473 | const VAddr second_addr = second->CpuAddr(); | 822 | } |
| 474 | const VAddr new_addr = std::min(first_addr, second_addr); | 823 | flags[Dirty::VertexBuffers] = false; |
| 475 | const std::size_t new_size = size_1 + size_2; | ||
| 476 | |||
| 477 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); | ||
| 478 | new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); | ||
| 479 | new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); | ||
| 480 | QueueDestruction(std::move(first)); | ||
| 481 | QueueDestruction(std::move(second)); | ||
| 482 | 824 | ||
| 483 | const VAddr cpu_addr_end = new_addr + new_size - 1; | 825 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 484 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; | 826 | UpdateVertexBuffer(index); |
| 485 | for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { | ||
| 486 | blocks.insert_or_assign(page_start, new_buffer); | ||
| 487 | } | ||
| 488 | return new_buffer; | ||
| 489 | } | 827 | } |
| 828 | } | ||
| 490 | 829 | ||
| 491 | Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { | 830 | template <class P> |
| 492 | std::shared_ptr<Buffer> found; | 831 | void BufferCache<P>::UpdateVertexBuffer(u32 index) { |
| 832 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { | ||
| 833 | return; | ||
| 834 | } | ||
| 835 | const auto& array = maxwell3d.regs.vertex_array[index]; | ||
| 836 | const auto& limit = maxwell3d.regs.vertex_array_limit[index]; | ||
| 837 | const GPUVAddr gpu_addr_begin = array.StartAddress(); | ||
| 838 | const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; | ||
| 839 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | ||
| 840 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | ||
| 841 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 842 | if (array.enable == 0 || size == 0 || !cpu_addr) { | ||
| 843 | vertex_buffers[index] = NULL_BINDING; | ||
| 844 | return; | ||
| 845 | } | ||
| 846 | vertex_buffers[index] = Binding{ | ||
| 847 | .cpu_addr = *cpu_addr, | ||
| 848 | .size = size, | ||
| 849 | .buffer_id = FindBuffer(*cpu_addr, size), | ||
| 850 | }; | ||
| 851 | } | ||
| 852 | |||
| 853 | template <class P> | ||
| 854 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | ||
| 855 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | ||
| 856 | Binding& binding = uniform_buffers[stage][index]; | ||
| 857 | if (binding.buffer_id) { | ||
| 858 | // Already updated | ||
| 859 | return; | ||
| 860 | } | ||
| 861 | // Mark as dirty | ||
| 862 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 863 | dirty_uniform_buffers[stage] |= 1U << index; | ||
| 864 | } | ||
| 865 | // Resolve buffer | ||
| 866 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 867 | }); | ||
| 868 | } | ||
| 869 | |||
| 870 | template <class P> | ||
| 871 | void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | ||
| 872 | const u32 written_mask = written_storage_buffers[stage]; | ||
| 873 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | ||
| 874 | // Resolve buffer | ||
| 875 | Binding& binding = storage_buffers[stage][index]; | ||
| 876 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 877 | binding.buffer_id = buffer_id; | ||
| 878 | // Mark buffer as written if needed | ||
| 879 | if (((written_mask >> index) & 1) != 0) { | ||
| 880 | MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | ||
| 881 | } | ||
| 882 | }); | ||
| 883 | } | ||
| 493 | 884 | ||
| 494 | const VAddr cpu_addr_end = cpu_addr + size - 1; | 885 | template <class P> |
| 495 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; | 886 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { |
| 496 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { | 887 | if (maxwell3d.regs.tfb_enabled == 0) { |
| 497 | auto it = blocks.find(page_start); | 888 | return; |
| 498 | if (it == blocks.end()) { | 889 | } |
| 499 | if (found) { | 890 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 500 | found = EnlargeBlock(found); | 891 | UpdateTransformFeedbackBuffer(index); |
| 501 | continue; | 892 | } |
| 502 | } | 893 | } |
| 503 | const VAddr start_addr = page_start << BLOCK_PAGE_BITS; | 894 | |
| 504 | found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); | 895 | template <class P> |
| 505 | blocks.insert_or_assign(page_start, found); | 896 | void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { |
| 506 | continue; | 897 | const auto& binding = maxwell3d.regs.tfb_bindings[index]; |
| 507 | } | 898 | const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; |
| 508 | if (!found) { | 899 | const u32 size = binding.buffer_size; |
| 509 | found = it->second; | 900 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 510 | continue; | 901 | if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { |
| 511 | } | 902 | transform_feedback_buffers[index] = NULL_BINDING; |
| 512 | if (found != it->second) { | 903 | return; |
| 513 | found = MergeBlocks(std::move(found), it->second); | 904 | } |
| 905 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 906 | transform_feedback_buffers[index] = Binding{ | ||
| 907 | .cpu_addr = *cpu_addr, | ||
| 908 | .size = size, | ||
| 909 | .buffer_id = buffer_id, | ||
| 910 | }; | ||
| 911 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||
| 912 | } | ||
| 913 | |||
| 914 | template <class P> | ||
| 915 | void BufferCache<P>::UpdateComputeUniformBuffers() { | ||
| 916 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | ||
| 917 | Binding& binding = compute_uniform_buffers[index]; | ||
| 918 | binding = NULL_BINDING; | ||
| 919 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 920 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | ||
| 921 | const auto& cbuf = launch_desc.const_buffer_config[index]; | ||
| 922 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); | ||
| 923 | if (cpu_addr) { | ||
| 924 | binding.cpu_addr = *cpu_addr; | ||
| 925 | binding.size = cbuf.size; | ||
| 514 | } | 926 | } |
| 515 | } | 927 | } |
| 516 | return found.get(); | 928 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 929 | }); | ||
| 930 | } | ||
| 931 | |||
| 932 | template <class P> | ||
| 933 | void BufferCache<P>::UpdateComputeStorageBuffers() { | ||
| 934 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | ||
| 935 | // Resolve buffer | ||
| 936 | Binding& binding = compute_storage_buffers[index]; | ||
| 937 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 938 | binding.buffer_id = buffer_id; | ||
| 939 | // Mark as written if needed | ||
| 940 | if (((written_compute_storage_buffers >> index) & 1) != 0) { | ||
| 941 | MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | ||
| 942 | } | ||
| 943 | }); | ||
| 944 | } | ||
| 945 | |||
| 946 | template <class P> | ||
| 947 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | ||
| 948 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 949 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | ||
| 950 | |||
| 951 | const bool is_accuracy_high = Settings::IsGPULevelHigh(); | ||
| 952 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 953 | if (!is_accuracy_high || !is_async) { | ||
| 954 | return; | ||
| 517 | } | 955 | } |
| 956 | if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) { | ||
| 957 | // Already inserted | ||
| 958 | return; | ||
| 959 | } | ||
| 960 | uncommitted_downloads.push_back(buffer_id); | ||
| 961 | } | ||
| 518 | 962 | ||
| 519 | void MarkRegionAsWritten(VAddr start, VAddr end) { | 963 | template <class P> |
| 520 | const u64 page_end = end >> WRITE_PAGE_BIT; | 964 | BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { |
| 521 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { | 965 | if (cpu_addr == 0) { |
| 522 | if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { | 966 | return NULL_BUFFER_ID; |
| 523 | ++it->second; | 967 | } |
| 524 | } | 968 | const u64 page = cpu_addr >> PAGE_BITS; |
| 969 | const BufferId buffer_id = page_table[page]; | ||
| 970 | if (!buffer_id) { | ||
| 971 | return CreateBuffer(cpu_addr, size); | ||
| 972 | } | ||
| 973 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 974 | if (buffer.IsInBounds(cpu_addr, size)) { | ||
| 975 | return buffer_id; | ||
| 976 | } | ||
| 977 | return CreateBuffer(cpu_addr, size); | ||
| 978 | } | ||
| 979 | |||
| 980 | template <class P> | ||
| 981 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | ||
| 982 | std::vector<BufferId> overlap_ids; | ||
| 983 | VAddr cpu_addr_begin = cpu_addr; | ||
| 984 | VAddr cpu_addr_end = cpu_addr + wanted_size; | ||
| 985 | for (; cpu_addr >> PAGE_BITS < Common::DivCeil(cpu_addr_end, PAGE_SIZE); | ||
| 986 | cpu_addr += PAGE_SIZE) { | ||
| 987 | const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS]; | ||
| 988 | if (!overlap_id) { | ||
| 989 | continue; | ||
| 990 | } | ||
| 991 | Buffer& overlap = slot_buffers[overlap_id]; | ||
| 992 | if (overlap.IsPicked()) { | ||
| 993 | continue; | ||
| 994 | } | ||
| 995 | overlap.Pick(); | ||
| 996 | overlap_ids.push_back(overlap_id); | ||
| 997 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); | ||
| 998 | if (overlap_cpu_addr < cpu_addr_begin) { | ||
| 999 | cpu_addr = cpu_addr_begin = overlap_cpu_addr; | ||
| 525 | } | 1000 | } |
| 1001 | cpu_addr_end = std::max(cpu_addr_end, overlap_cpu_addr + overlap.SizeBytes()); | ||
| 526 | } | 1002 | } |
| 527 | 1003 | const u32 size = static_cast<u32>(cpu_addr_end - cpu_addr_begin); | |
| 528 | void UnmarkRegionAsWritten(VAddr start, VAddr end) { | 1004 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, cpu_addr_begin, size); |
| 529 | const u64 page_end = end >> WRITE_PAGE_BIT; | 1005 | Buffer& new_buffer = slot_buffers[new_buffer_id]; |
| 530 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { | 1006 | |
| 531 | auto it = written_pages.find(page_start); | 1007 | for (const BufferId overlap_id : overlap_ids) { |
| 532 | if (it != written_pages.end()) { | 1008 | Buffer& overlap = slot_buffers[overlap_id]; |
| 533 | if (it->second > 1) { | 1009 | overlap.Unpick(); |
| 534 | --it->second; | 1010 | |
| 535 | } else { | 1011 | std::vector<BufferCopy> copies; |
| 536 | written_pages.erase(it); | 1012 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); |
| 537 | } | 1013 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { |
| 538 | } | 1014 | copies.push_back(BufferCopy{ |
| 1015 | .src_offset = begin, | ||
| 1016 | .dst_offset = dst_base_offset + begin, | ||
| 1017 | .size = range_size, | ||
| 1018 | }); | ||
| 1019 | new_buffer.UnmarkRegionAsCpuModified(begin, range_size); | ||
| 1020 | new_buffer.MarkRegionAsGpuModified(begin, range_size); | ||
| 1021 | }); | ||
| 1022 | if (!copies.empty()) { | ||
| 1023 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); | ||
| 1024 | } | ||
| 1025 | ReplaceBufferDownloads(overlap_id, new_buffer_id); | ||
| 1026 | DeleteBuffer(overlap_id); | ||
| 1027 | } | ||
| 1028 | Register(new_buffer_id); | ||
| 1029 | return new_buffer_id; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | template <class P> | ||
| 1033 | void BufferCache<P>::Register(BufferId buffer_id) { | ||
| 1034 | ChangeRegister<true>(buffer_id); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | template <class P> | ||
| 1038 | void BufferCache<P>::Unregister(BufferId buffer_id) { | ||
| 1039 | ChangeRegister<false>(buffer_id); | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | template <class P> | ||
| 1043 | template <bool insert> | ||
| 1044 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | ||
| 1045 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 1046 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | ||
| 1047 | const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); | ||
| 1048 | const u64 page_begin = cpu_addr_begin / PAGE_SIZE; | ||
| 1049 | const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); | ||
| 1050 | for (u64 page = page_begin; page != page_end; ++page) { | ||
| 1051 | if constexpr (insert) { | ||
| 1052 | page_table[page] = buffer_id; | ||
| 1053 | } else { | ||
| 1054 | page_table[page] = BufferId{}; | ||
| 539 | } | 1055 | } |
| 540 | } | 1056 | } |
| 1057 | } | ||
| 541 | 1058 | ||
| 542 | bool IsRegionWritten(VAddr start, VAddr end) const { | 1059 | template <class P> |
| 543 | const u64 page_end = end >> WRITE_PAGE_BIT; | 1060 | void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 544 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { | 1061 | if (buffer.CpuAddr() == 0) { |
| 545 | if (written_pages.contains(page_start)) { | 1062 | return; |
| 546 | return true; | 1063 | } |
| 1064 | SynchronizeBufferImpl(buffer, cpu_addr, size); | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | template <class P> | ||
| 1068 | void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1069 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1070 | u64 total_size_bytes = 0; | ||
| 1071 | u64 largest_copy = 0; | ||
| 1072 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1073 | copies.push_back(BufferCopy{ | ||
| 1074 | .src_offset = total_size_bytes, | ||
| 1075 | .dst_offset = range_offset, | ||
| 1076 | .size = range_size, | ||
| 1077 | }); | ||
| 1078 | total_size_bytes += range_size; | ||
| 1079 | largest_copy = std::max(largest_copy, range_size); | ||
| 1080 | }); | ||
| 1081 | if (total_size_bytes == 0) { | ||
| 1082 | return; | ||
| 1083 | } | ||
| 1084 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1085 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | template <class P> | ||
| 1089 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 1090 | std::span<BufferCopy> copies) { | ||
| 1091 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1092 | MappedUploadMemory(buffer, total_size_bytes, copies); | ||
| 1093 | } else { | ||
| 1094 | ImmediateUploadMemory(buffer, largest_copy, copies); | ||
| 1095 | } | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | template <class P> | ||
| 1099 | void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 1100 | std::span<const BufferCopy> copies) { | ||
| 1101 | std::span<u8> immediate_buffer; | ||
| 1102 | for (const BufferCopy& copy : copies) { | ||
| 1103 | std::span<const u8> upload_span; | ||
| 1104 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1105 | if (IsRangeGranular(cpu_addr, copy.size)) { | ||
| 1106 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | ||
| 1107 | } else { | ||
| 1108 | if (immediate_buffer.empty()) { | ||
| 1109 | immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 547 | } | 1110 | } |
| 1111 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1112 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 548 | } | 1113 | } |
| 549 | return false; | 1114 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| 550 | } | 1115 | } |
| 551 | 1116 | } | |
| 552 | void QueueDestruction(std::shared_ptr<Buffer> buffer) { | 1117 | |
| 553 | buffer->SetEpoch(epoch); | 1118 | template <class P> |
| 554 | pending_destruction.push(std::move(buffer)); | 1119 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, |
| 1120 | std::span<const BufferCopy> copies) { | ||
| 1121 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); | ||
| 1122 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | ||
| 1123 | for (const BufferCopy& copy : copies) { | ||
| 1124 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1125 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | ||
| 1126 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | ||
| 555 | } | 1127 | } |
| 556 | 1128 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | |
| 557 | void MarkForAsyncFlush(MapInterval* map) { | 1129 | } |
| 558 | if (!uncommitted_flushes) { | 1130 | |
| 559 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); | 1131 | template <class P> |
| 1132 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | ||
| 1133 | const auto scalar_replace = [buffer_id](Binding& binding) { | ||
| 1134 | if (binding.buffer_id == buffer_id) { | ||
| 1135 | binding.buffer_id = BufferId{}; | ||
| 1136 | } | ||
| 1137 | }; | ||
| 1138 | const auto replace = [scalar_replace](std::span<Binding> bindings) { | ||
| 1139 | std::ranges::for_each(bindings, scalar_replace); | ||
| 1140 | }; | ||
| 1141 | scalar_replace(index_buffer); | ||
| 1142 | replace(vertex_buffers); | ||
| 1143 | std::ranges::for_each(uniform_buffers, replace); | ||
| 1144 | std::ranges::for_each(storage_buffers, replace); | ||
| 1145 | replace(transform_feedback_buffers); | ||
| 1146 | replace(compute_uniform_buffers); | ||
| 1147 | replace(compute_storage_buffers); | ||
| 1148 | std::erase(cached_write_buffer_ids, buffer_id); | ||
| 1149 | |||
| 1150 | // Mark the whole buffer as CPU written to stop tracking CPU writes | ||
| 1151 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 1152 | buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1153 | |||
| 1154 | Unregister(buffer_id); | ||
| 1155 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | ||
| 1156 | |||
| 1157 | NotifyBufferDeletion(); | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | template <class P> | ||
| 1161 | void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) { | ||
| 1162 | const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) { | ||
| 1163 | std::ranges::replace(buffers, old_buffer_id, new_buffer_id); | ||
| 1164 | if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) { | ||
| 1165 | buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end()); | ||
| 560 | } | 1166 | } |
| 561 | uncommitted_flushes->insert(map); | 1167 | }; |
| 1168 | replace(uncommitted_downloads); | ||
| 1169 | std::ranges::for_each(committed_downloads, replace); | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | template <class P> | ||
| 1173 | void BufferCache<P>::NotifyBufferDeletion() { | ||
| 1174 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 1175 | dirty_uniform_buffers.fill(~u32{0}); | ||
| 562 | } | 1176 | } |
| 1177 | auto& flags = maxwell3d.dirty.flags; | ||
| 1178 | flags[Dirty::IndexBuffer] = true; | ||
| 1179 | flags[Dirty::VertexBuffers] = true; | ||
| 1180 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | ||
| 1181 | flags[Dirty::VertexBuffer0 + index] = true; | ||
| 1182 | } | ||
| 1183 | has_deleted_buffers = true; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | template <class P> | ||
| 1187 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { | ||
| 1188 | const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); | ||
| 1189 | const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); | ||
| 1190 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1191 | if (!cpu_addr || size == 0) { | ||
| 1192 | return NULL_BINDING; | ||
| 1193 | } | ||
| 1194 | const Binding binding{ | ||
| 1195 | .cpu_addr = *cpu_addr, | ||
| 1196 | .size = size, | ||
| 1197 | .buffer_id = BufferId{}, | ||
| 1198 | }; | ||
| 1199 | return binding; | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | template <class P> | ||
| 1203 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { | ||
| 1204 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); | ||
| 1205 | if (IsRangeGranular(cpu_addr, size) || | ||
| 1206 | base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { | ||
| 1207 | return std::span(base_pointer, size); | ||
| 1208 | } else { | ||
| 1209 | const std::span<u8> span = ImmediateBuffer(size); | ||
| 1210 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | ||
| 1211 | return span; | ||
| 1212 | } | ||
| 1213 | } | ||
| 563 | 1214 | ||
| 564 | VideoCore::RasterizerInterface& rasterizer; | 1215 | template <class P> |
| 565 | Tegra::MemoryManager& gpu_memory; | 1216 | std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { |
| 566 | Core::Memory::Memory& cpu_memory; | 1217 | if (wanted_capacity > immediate_buffer_capacity) { |
| 567 | StreamBuffer& stream_buffer; | 1218 | immediate_buffer_capacity = wanted_capacity; |
| 568 | 1219 | immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity); | |
| 569 | u8* buffer_ptr = nullptr; | 1220 | } |
| 570 | u64 buffer_offset = 0; | 1221 | return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity); |
| 571 | u64 buffer_offset_base = 0; | 1222 | } |
| 572 | 1223 | ||
| 573 | MapIntervalAllocator mapped_addresses_allocator; | 1224 | template <class P> |
| 574 | boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> | 1225 | bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { |
| 575 | mapped_addresses; | 1226 | if constexpr (IS_OPENGL) { |
| 576 | 1227 | return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; | |
| 577 | std::unordered_map<u64, u32> written_pages; | 1228 | } else { |
| 578 | std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; | 1229 | // Only OpenGL has fast uniform buffers |
| 579 | 1230 | return false; | |
| 580 | std::queue<std::shared_ptr<Buffer>> pending_destruction; | 1231 | } |
| 581 | u64 epoch = 0; | 1232 | } |
| 582 | u64 modified_ticks = 0; | ||
| 583 | |||
| 584 | std::vector<u8> staging_buffer; | ||
| 585 | |||
| 586 | std::list<MapInterval*> marked_for_unregister; | ||
| 587 | |||
| 588 | std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; | ||
| 589 | std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; | ||
| 590 | |||
| 591 | std::recursive_mutex mutex; | ||
| 592 | }; | ||
| 593 | 1233 | ||
| 594 | } // namespace VideoCommon | 1234 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp deleted file mode 100644 index 62587e18a..000000000 --- a/src/video_core/buffer_cache/map_interval.cpp +++ /dev/null | |||
| @@ -1,33 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | ||
| 9 | |||
| 10 | #include "video_core/buffer_cache/map_interval.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | MapIntervalAllocator::MapIntervalAllocator() { | ||
| 15 | FillFreeList(first_chunk); | ||
| 16 | } | ||
| 17 | |||
| 18 | MapIntervalAllocator::~MapIntervalAllocator() = default; | ||
| 19 | |||
| 20 | void MapIntervalAllocator::AllocateNewChunk() { | ||
| 21 | *new_chunk = std::make_unique<Chunk>(); | ||
| 22 | FillFreeList(**new_chunk); | ||
| 23 | new_chunk = &(*new_chunk)->next; | ||
| 24 | } | ||
| 25 | |||
| 26 | void MapIntervalAllocator::FillFreeList(Chunk& chunk) { | ||
| 27 | const std::size_t old_size = free_list.size(); | ||
| 28 | free_list.resize(old_size + chunk.data.size()); | ||
| 29 | std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, | ||
| 30 | [](MapInterval& interval) { return &interval; }); | ||
| 31 | } | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h deleted file mode 100644 index ef974b08a..000000000 --- a/src/video_core/buffer_cache/map_interval.h +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <boost/intrusive/set_hook.hpp> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/gpu.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { | ||
| 20 | MapInterval() = default; | ||
| 21 | |||
| 22 | /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} | ||
| 23 | |||
| 24 | explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept | ||
| 25 | : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} | ||
| 26 | |||
| 27 | bool IsInside(VAddr other_start, VAddr other_end) const noexcept { | ||
| 28 | return start <= other_start && other_end <= end; | ||
| 29 | } | ||
| 30 | |||
| 31 | bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { | ||
| 32 | return start < other_end && other_start < end; | ||
| 33 | } | ||
| 34 | |||
| 35 | void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { | ||
| 36 | is_modified = is_modified_; | ||
| 37 | ticks = ticks_; | ||
| 38 | } | ||
| 39 | |||
| 40 | boost::intrusive::set_member_hook<> member_hook_; | ||
| 41 | VAddr start = 0; | ||
| 42 | VAddr end = 0; | ||
| 43 | GPUVAddr gpu_addr = 0; | ||
| 44 | u64 ticks = 0; | ||
| 45 | bool is_written = false; | ||
| 46 | bool is_modified = false; | ||
| 47 | bool is_registered = false; | ||
| 48 | bool is_memory_marked = false; | ||
| 49 | bool is_sync_pending = false; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct MapIntervalCompare { | ||
| 53 | constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { | ||
| 54 | return lhs.start < rhs.start; | ||
| 55 | } | ||
| 56 | }; | ||
| 57 | |||
| 58 | class MapIntervalAllocator { | ||
| 59 | public: | ||
| 60 | MapIntervalAllocator(); | ||
| 61 | ~MapIntervalAllocator(); | ||
| 62 | |||
| 63 | MapInterval* Allocate() { | ||
| 64 | if (free_list.empty()) { | ||
| 65 | AllocateNewChunk(); | ||
| 66 | } | ||
| 67 | MapInterval* const interval = free_list.back(); | ||
| 68 | free_list.pop_back(); | ||
| 69 | return interval; | ||
| 70 | } | ||
| 71 | |||
| 72 | void Release(MapInterval* interval) { | ||
| 73 | free_list.push_back(interval); | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | struct Chunk { | ||
| 78 | std::unique_ptr<Chunk> next; | ||
| 79 | std::array<MapInterval, 0x8000> data; | ||
| 80 | }; | ||
| 81 | |||
| 82 | void AllocateNewChunk(); | ||
| 83 | |||
| 84 | void FillFreeList(Chunk& chunk); | ||
| 85 | |||
| 86 | std::vector<MapInterval*> free_list; | ||
| 87 | |||
| 88 | Chunk first_chunk; | ||
| 89 | |||
| 90 | std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; | ||
| 91 | }; | ||
| 92 | |||
| 93 | } // namespace VideoCommon | ||
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 55e632346..2b7569335 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -110,12 +110,10 @@ void Vic::Execute() { | |||
| 110 | converted_frame_buffer.get(), block_height, 0, 0); | 110 | converted_frame_buffer.get(), block_height, 0, 0); |
| 111 | 111 | ||
| 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); |
| 113 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 114 | } else { | 113 | } else { |
| 115 | // send pitch linear frame | 114 | // send pitch linear frame |
| 116 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | 115 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, |
| 117 | linear_size); | 116 | linear_size); |
| 118 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 119 | } | 117 | } |
| 120 | break; | 118 | break; |
| 121 | } | 119 | } |
| @@ -163,7 +161,6 @@ void Vic::Execute() { | |||
| 163 | } | 161 | } |
| 164 | gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), | 162 | gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), |
| 165 | chroma_buffer.size()); | 163 | chroma_buffer.size()); |
| 166 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 167 | break; | 164 | break; |
| 168 | } | 165 | } |
| 169 | default: | 166 | default: |
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index b1eaac00c..7149af290 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -12,13 +12,30 @@ | |||
| 12 | #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) | 12 | #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) |
| 13 | 13 | ||
| 14 | namespace VideoCommon::Dirty { | 14 | namespace VideoCommon::Dirty { |
| 15 | 15 | namespace { | |
| 16 | using Tegra::Engines::Maxwell3D; | 16 | using Tegra::Engines::Maxwell3D; |
| 17 | 17 | ||
| 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | 18 | void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) { |
| 19 | static constexpr std::size_t num_array = 3; | ||
| 20 | for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) { | ||
| 21 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | ||
| 22 | const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||
| 23 | |||
| 24 | FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||
| 25 | FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) { | ||
| 30 | FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer); | ||
| 31 | } | ||
| 32 | |||
| 33 | void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) { | ||
| 19 | FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); | 34 | FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); |
| 20 | FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); | 35 | FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); |
| 36 | } | ||
| 21 | 37 | ||
| 38 | void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { | ||
| 22 | static constexpr std::size_t num_per_rt = NUM(rt[0]); | 39 | static constexpr std::size_t num_per_rt = NUM(rt[0]); |
| 23 | static constexpr std::size_t begin = OFF(rt); | 40 | static constexpr std::size_t begin = OFF(rt); |
| 24 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; | 41 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; |
| @@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl | |||
| 41 | FillBlock(table, OFF(zeta), NUM(zeta), flag); | 58 | FillBlock(table, OFF(zeta), NUM(zeta), flag); |
| 42 | } | 59 | } |
| 43 | } | 60 | } |
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { | ||
| 64 | SetupDirtyVertexBuffers(tables); | ||
| 65 | SetupIndexBuffer(tables); | ||
| 66 | SetupDirtyDescriptors(tables); | ||
| 67 | SetupDirtyRenderTargets(tables); | ||
| 68 | } | ||
| 44 | 69 | ||
| 45 | } // namespace VideoCommon::Dirty | 70 | } // namespace VideoCommon::Dirty |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 875527ddd..702688ace 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -30,6 +30,12 @@ enum : u8 { | |||
| 30 | ColorBuffer7, | 30 | ColorBuffer7, |
| 31 | ZetaBuffer, | 31 | ZetaBuffer, |
| 32 | 32 | ||
| 33 | VertexBuffers, | ||
| 34 | VertexBuffer0, | ||
| 35 | VertexBuffer31 = VertexBuffer0 + 31, | ||
| 36 | |||
| 37 | IndexBuffer, | ||
| 38 | |||
| 33 | LastCommonEntry, | 39 | LastCommonEntry, |
| 34 | }; | 40 | }; |
| 35 | 41 | ||
| @@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ | |||
| 47 | FillBlock(tables[1], begin, num, index_b); | 53 | FillBlock(tables[1], begin, num, index_b); |
| 48 | } | 54 | } |
| 49 | 55 | ||
| 50 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); | 56 | void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); |
| 51 | 57 | ||
| 52 | } // namespace VideoCommon::Dirty | 58 | } // namespace VideoCommon::Dirty |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 2c8b20024..8b33c04ab 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() { | |||
| 23 | MICROPROFILE_SCOPE(DispatchCalls); | 23 | MICROPROFILE_SCOPE(DispatchCalls); |
| 24 | 24 | ||
| 25 | gpu.SyncGuestHost(); | 25 | gpu.SyncGuestHost(); |
| 26 | // On entering GPU code, assume all memory may be touched by the ARM core. | ||
| 27 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 28 | 26 | ||
| 29 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 30 | 28 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ed29fc7ac..a9b75091e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 39 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | 39 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { |
| 40 | upload_state.ProcessData(method_argument, is_last_call); | 40 | upload_state.ProcessData(method_argument, is_last_call); |
| 41 | if (is_last_call) { | 41 | if (is_last_call) { |
| 42 | system.GPU().Maxwell3D().OnMemoryWrite(); | ||
| 43 | } | 42 | } |
| 44 | break; | 43 | break; |
| 45 | } | 44 | } |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 9911140e9..560551157 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
| 33 | case KEPLERMEMORY_REG_INDEX(data): { | 33 | case KEPLERMEMORY_REG_INDEX(data): { |
| 34 | upload_state.ProcessData(method_argument, is_last_call); | 34 | upload_state.ProcessData(method_argument, is_last_call); |
| 35 | if (is_last_call) { | 35 | if (is_last_call) { |
| 36 | system.GPU().Maxwell3D().OnMemoryWrite(); | ||
| 37 | } | 36 | } |
| 38 | break; | 37 | break; |
| 39 | } | 38 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index d6ba9da5c..75517a4f7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 223 | case MAXWELL3D_REG_INDEX(data_upload): | 223 | case MAXWELL3D_REG_INDEX(data_upload): |
| 224 | upload_state.ProcessData(argument, is_last_call); | 224 | upload_state.ProcessData(argument, is_last_call); |
| 225 | if (is_last_call) { | 225 | if (is_last_call) { |
| 226 | OnMemoryWrite(); | ||
| 227 | } | 226 | } |
| 228 | return; | 227 | return; |
| 229 | case MAXWELL3D_REG_INDEX(fragment_barrier): | 228 | case MAXWELL3D_REG_INDEX(fragment_barrier): |
| @@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
| 570 | } | 569 | } |
| 571 | } | 570 | } |
| 572 | 571 | ||
| 573 | void Maxwell3D::ProcessCBBind(std::size_t stage_index) { | 572 | void Maxwell3D::ProcessCBBind(size_t stage_index) { |
| 574 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. | 573 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. |
| 575 | auto& shader = state.shader_stages[stage_index]; | 574 | const auto& bind_data = regs.cb_bind[stage_index]; |
| 576 | auto& bind_data = regs.cb_bind[stage_index]; | 575 | auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index]; |
| 577 | |||
| 578 | ASSERT(bind_data.index < Regs::MaxConstBuffers); | ||
| 579 | auto& buffer = shader.const_buffers[bind_data.index]; | ||
| 580 | |||
| 581 | buffer.enabled = bind_data.valid.Value() != 0; | 576 | buffer.enabled = bind_data.valid.Value() != 0; |
| 582 | buffer.address = regs.const_buffer.BufferAddress(); | 577 | buffer.address = regs.const_buffer.BufferAddress(); |
| 583 | buffer.size = regs.const_buffer.cb_size; | 578 | buffer.size = regs.const_buffer.cb_size; |
| 579 | |||
| 580 | const bool is_enabled = bind_data.valid.Value() != 0; | ||
| 581 | const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0; | ||
| 582 | const u32 size = is_enabled ? regs.const_buffer.cb_size : 0; | ||
| 583 | rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); | ||
| 584 | } | 584 | } |
| 585 | 585 | ||
| 586 | void Maxwell3D::ProcessCBData(u32 value) { | 586 | void Maxwell3D::ProcessCBData(u32 value) { |
| @@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() { | |||
| 635 | 635 | ||
| 636 | const u32 id = cb_data_state.id; | 636 | const u32 id = cb_data_state.id; |
| 637 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | 637 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 638 | OnMemoryWrite(); | ||
| 639 | 638 | ||
| 640 | cb_data_state.id = null_cb_data; | 639 | cb_data_state.id = null_cb_data; |
| 641 | cb_data_state.current = null_cb_data; | 640 | cb_data_state.current = null_cb_data; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cc94d2678..ffed42a29 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1314,8 +1314,7 @@ public: | |||
| 1314 | 1314 | ||
| 1315 | GPUVAddr LimitAddress() const { | 1315 | GPUVAddr LimitAddress() const { |
| 1316 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | | 1316 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | |
| 1317 | limit_low) + | 1317 | limit_low); |
| 1318 | 1; | ||
| 1319 | } | 1318 | } |
| 1320 | } vertex_array_limit[NumVertexArrays]; | 1319 | } vertex_array_limit[NumVertexArrays]; |
| 1321 | 1320 | ||
| @@ -1403,6 +1402,7 @@ public: | |||
| 1403 | }; | 1402 | }; |
| 1404 | 1403 | ||
| 1405 | std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; | 1404 | std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; |
| 1405 | |||
| 1406 | u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. | 1406 | u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. |
| 1407 | }; | 1407 | }; |
| 1408 | 1408 | ||
| @@ -1452,11 +1452,6 @@ public: | |||
| 1452 | return *rasterizer; | 1452 | return *rasterizer; |
| 1453 | } | 1453 | } |
| 1454 | 1454 | ||
| 1455 | /// Notify a memory write has happened. | ||
| 1456 | void OnMemoryWrite() { | ||
| 1457 | dirty.flags |= dirty.on_write_stores; | ||
| 1458 | } | ||
| 1459 | |||
| 1460 | enum class MMEDrawMode : u32 { | 1455 | enum class MMEDrawMode : u32 { |
| 1461 | Undefined, | 1456 | Undefined, |
| 1462 | Array, | 1457 | Array, |
| @@ -1478,7 +1473,6 @@ public: | |||
| 1478 | using Tables = std::array<Table, 2>; | 1473 | using Tables = std::array<Table, 2>; |
| 1479 | 1474 | ||
| 1480 | Flags flags; | 1475 | Flags flags; |
| 1481 | Flags on_write_stores; | ||
| 1482 | Tables tables{}; | 1476 | Tables tables{}; |
| 1483 | } dirty; | 1477 | } dirty; |
| 1484 | 1478 | ||
| @@ -1541,7 +1535,7 @@ private: | |||
| 1541 | void FinishCBData(); | 1535 | void FinishCBData(); |
| 1542 | 1536 | ||
| 1543 | /// Handles a write to the CB_BIND register. | 1537 | /// Handles a write to the CB_BIND register. |
| 1544 | void ProcessCBBind(std::size_t stage_index); | 1538 | void ProcessCBBind(size_t stage_index); |
| 1545 | 1539 | ||
| 1546 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. | 1540 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. |
| 1547 | void DrawArrays(); | 1541 | void DrawArrays(); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ba750748c..a2f19559f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -60,9 +60,6 @@ void MaxwellDMA::Launch() { | |||
| 60 | return; | 60 | return; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 64 | system.GPU().Maxwell3D().OnMemoryWrite(); | ||
| 65 | |||
| 66 | if (is_src_pitch && is_dst_pitch) { | 63 | if (is_src_pitch && is_dst_pitch) { |
| 67 | CopyPitchToPitch(); | 64 | CopyPitchToPitch(); |
| 68 | } else { | 65 | } else { |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 3512283ff..f055b61e9 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -143,22 +143,26 @@ private: | |||
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | bool ShouldWait() const { | 145 | bool ShouldWait() const { |
| 146 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 146 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | 147 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || |
| 147 | query_cache.ShouldWaitAsyncFlushes(); | 148 | query_cache.ShouldWaitAsyncFlushes(); |
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | bool ShouldFlush() const { | 151 | bool ShouldFlush() const { |
| 152 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 151 | return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || | 153 | return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || |
| 152 | query_cache.HasUncommittedFlushes(); | 154 | query_cache.HasUncommittedFlushes(); |
| 153 | } | 155 | } |
| 154 | 156 | ||
| 155 | void PopAsyncFlushes() { | 157 | void PopAsyncFlushes() { |
| 158 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 156 | texture_cache.PopAsyncFlushes(); | 159 | texture_cache.PopAsyncFlushes(); |
| 157 | buffer_cache.PopAsyncFlushes(); | 160 | buffer_cache.PopAsyncFlushes(); |
| 158 | query_cache.PopAsyncFlushes(); | 161 | query_cache.PopAsyncFlushes(); |
| 159 | } | 162 | } |
| 160 | 163 | ||
| 161 | void CommitAsyncFlushes() { | 164 | void CommitAsyncFlushes() { |
| 165 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 162 | texture_cache.CommitAsyncFlushes(); | 166 | texture_cache.CommitAsyncFlushes(); |
| 163 | buffer_cache.CommitAsyncFlushes(); | 167 | buffer_cache.CommitAsyncFlushes(); |
| 164 | query_cache.CommitAsyncFlushes(); | 168 | query_cache.CommitAsyncFlushes(); |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 28f2b8614..970120acc 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -12,7 +12,6 @@ set(SHADER_FILES | |||
| 12 | vulkan_blit_depth_stencil.frag | 12 | vulkan_blit_depth_stencil.frag |
| 13 | vulkan_present.frag | 13 | vulkan_present.frag |
| 14 | vulkan_present.vert | 14 | vulkan_present.vert |
| 15 | vulkan_quad_array.comp | ||
| 16 | vulkan_quad_indexed.comp | 15 | vulkan_quad_indexed.comp |
| 17 | vulkan_uint8.comp | 16 | vulkan_uint8.comp |
| 18 | ) | 17 | ) |
diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp deleted file mode 100644 index 212f4e998..000000000 --- a/src/video_core/host_shaders/vulkan_quad_array.comp +++ /dev/null | |||
| @@ -1,28 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | |||
| 7 | layout (local_size_x = 1024) in; | ||
| 8 | |||
| 9 | layout (std430, set = 0, binding = 0) buffer OutputBuffer { | ||
| 10 | uint output_indexes[]; | ||
| 11 | }; | ||
| 12 | |||
| 13 | layout (push_constant) uniform PushConstants { | ||
| 14 | uint first; | ||
| 15 | }; | ||
| 16 | |||
| 17 | void main() { | ||
| 18 | uint primitive = gl_GlobalInvocationID.x; | ||
| 19 | if (primitive * 6 >= output_indexes.length()) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | |||
| 23 | const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); | ||
| 24 | for (uint vertex = 0; vertex < 6; ++vertex) { | ||
| 25 | uint index = first + primitive * 4 + quad_map[vertex]; | ||
| 26 | output_indexes[primitive * 6 + vertex] = index; | ||
| 27 | } | ||
| 28 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index ad74d7af9..872291670 100644 --- a/src/video_core/host_shaders/vulkan_uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp | |||
| @@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | |||
| 16 | uint16_t output_indexes[]; | 16 | uint16_t output_indexes[]; |
| 17 | }; | 17 | }; |
| 18 | 18 | ||
| 19 | uint AssembleIndex(uint id) { | ||
| 20 | // Most primitive restart indices are 0xFF | ||
| 21 | // Hardcode this to 0xFF for now | ||
| 22 | uint index = uint(input_indexes[id]); | ||
| 23 | return index == 0xFF ? 0xFFFF : index; | ||
| 24 | } | ||
| 25 | |||
| 19 | void main() { | 26 | void main() { |
| 20 | uint id = gl_GlobalInvocationID.x; | 27 | uint id = gl_GlobalInvocationID.x; |
| 21 | if (id < input_indexes.length()) { | 28 | if (id < input_indexes.length()) { |
| 22 | output_indexes[id] = uint16_t(input_indexes[id]); | 29 | output_indexes[id] = uint16_t(AssembleIndex(id)); |
| 23 | } | 30 | } |
| 24 | } | 31 | } |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0cb0f387d..50491b758 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include <span> | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "video_core/engines/fermi_2d.h" | 12 | #include "video_core/engines/fermi_2d.h" |
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| @@ -49,6 +50,10 @@ public: | |||
| 49 | /// Records a GPU query and caches it | 50 | /// Records a GPU query and caches it |
| 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | 51 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |
| 51 | 52 | ||
| 53 | /// Signal an uniform buffer binding | ||
| 54 | virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 55 | u32 size) = 0; | ||
| 56 | |||
| 52 | /// Signal a GPU based semaphore as a fence | 57 | /// Signal a GPU based semaphore as a fence |
| 53 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | 58 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; |
| 54 | 59 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 5772cad87..889ad6c56 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,98 +2,235 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <memory> | 5 | #include <span> |
| 6 | 6 | ||
| 7 | #include <glad/glad.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | 7 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/rasterizer_interface.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 8 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 15 | #include "video_core/renderer_opengl/gl_device.h" | 9 | #include "video_core/renderer_opengl/gl_device.h" |
| 16 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 10 | #include "video_core/vulkan_common/vulkan_device.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 11 | #include "video_core/vulkan_common/vulkan_instance.h" |
| 12 | #include "video_core/vulkan_common/vulkan_library.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 18 | 14 | ||
| 19 | namespace OpenGL { | 15 | namespace OpenGL { |
| 16 | namespace { | ||
| 17 | struct BindlessSSBO { | ||
| 18 | GLuint64EXT address; | ||
| 19 | GLsizei length; | ||
| 20 | GLsizei padding; | ||
| 21 | }; | ||
| 22 | static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4); | ||
| 23 | |||
| 24 | constexpr std::array PROGRAM_LUT{ | ||
| 25 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||
| 26 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||
| 27 | }; | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||
| 31 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||
| 32 | |||
| 33 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||
| 34 | VAddr cpu_addr_, u64 size_bytes_) | ||
| 35 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 36 | buffer.Create(); | ||
| 37 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | ||
| 38 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 39 | if (runtime.device.UseAssemblyShaders()) { | ||
| 40 | CreateMemoryObjects(runtime); | ||
| 41 | glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(), | ||
| 42 | memory_commit.Offset()); | ||
| 43 | } else { | ||
| 44 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||
| 45 | } | ||
| 46 | if (runtime.has_unified_vertex_buffers) { | ||
| 47 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||
| 48 | } | ||
| 49 | } | ||
| 20 | 50 | ||
| 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 51 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { |
| 52 | glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 53 | static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||
| 54 | } | ||
| 22 | 55 | ||
| 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 56 | void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept { |
| 57 | glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 58 | static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||
| 59 | } | ||
| 24 | 60 | ||
| 25 | Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) | 61 | void Buffer::MakeResident(GLenum access) noexcept { |
| 26 | : BufferBlock{cpu_addr_, size_} { | 62 | // Abuse GLenum's order to exit early |
| 27 | gl_buffer.Create(); | 63 | // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE |
| 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); | 64 | if (access <= current_residency_access || buffer.handle == 0) { |
| 29 | if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { | 65 | return; |
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); | 66 | } |
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | 67 | if (std::exchange(current_residency_access, access) != GL_NONE) { |
| 68 | // If the buffer is already resident, remove its residency before promoting it | ||
| 69 | glMakeNamedBufferNonResidentNV(buffer.handle); | ||
| 32 | } | 70 | } |
| 71 | glMakeNamedBufferResidentNV(buffer.handle, access); | ||
| 33 | } | 72 | } |
| 34 | 73 | ||
| 35 | Buffer::~Buffer() = default; | 74 | GLuint Buffer::SubBuffer(u32 offset) { |
| 75 | if (offset == 0) { | ||
| 76 | return buffer.handle; | ||
| 77 | } | ||
| 78 | for (const auto& [sub_buffer, sub_offset] : subs) { | ||
| 79 | if (sub_offset == offset) { | ||
| 80 | return sub_buffer.handle; | ||
| 81 | } | ||
| 82 | } | ||
| 83 | OGLBuffer sub_buffer; | ||
| 84 | sub_buffer.Create(); | ||
| 85 | glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset, | ||
| 86 | memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset); | ||
| 87 | return subs.emplace_back(std::move(sub_buffer), offset).first.handle; | ||
| 88 | } | ||
| 36 | 89 | ||
| 37 | void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | 90 | void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) { |
| 38 | glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), | 91 | auto& allocator = runtime.vulkan_memory_allocator; |
| 39 | static_cast<GLsizeiptr>(data_size), data); | 92 | auto& device = runtime.vulkan_device->GetLogical(); |
| 93 | auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{ | ||
| 94 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 95 | .pNext = nullptr, | ||
| 96 | .flags = 0, | ||
| 97 | .size = SizeBytes(), | ||
| 98 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 99 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 100 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 101 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 102 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 103 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 104 | .queueFamilyIndexCount = 0, | ||
| 105 | .pQueueFamilyIndices = nullptr, | ||
| 106 | }); | ||
| 107 | const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer); | ||
| 108 | memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal); | ||
| 40 | } | 109 | } |
| 41 | 110 | ||
| 42 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 111 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, |
| 43 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | 112 | Vulkan::MemoryAllocator* vulkan_memory_allocator_) |
| 44 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); | 113 | : device{device_}, vulkan_device{vulkan_device_}, |
| 45 | const GLintptr gl_offset = static_cast<GLintptr>(offset); | 114 | vulkan_memory_allocator{vulkan_memory_allocator_}, |
| 46 | if (read_buffer.handle == 0) { | 115 | stream_buffer{device.HasFastBufferSubData() ? std::nullopt |
| 47 | read_buffer.Create(); | 116 | : std::make_optional<StreamBuffer>()} { |
| 48 | glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, | 117 | GLint gl_max_attributes; |
| 49 | GL_STREAM_READ); | 118 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); |
| 119 | max_attributes = static_cast<u32>(gl_max_attributes); | ||
| 120 | use_assembly_shaders = device.UseAssemblyShaders(); | ||
| 121 | has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory(); | ||
| 122 | |||
| 123 | for (auto& stage_uniforms : fast_uniforms) { | ||
| 124 | for (OGLBuffer& buffer : stage_uniforms) { | ||
| 125 | buffer.Create(); | ||
| 126 | glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); | ||
| 127 | } | ||
| 50 | } | 128 | } |
| 51 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 52 | glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); | ||
| 53 | glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); | ||
| 54 | } | 129 | } |
| 55 | 130 | ||
| 56 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 131 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 57 | std::size_t copy_size) { | 132 | std::span<const VideoCommon::BufferCopy> copies) { |
| 58 | glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), | 133 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 59 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); | 134 | glCopyNamedBufferSubData( |
| 135 | src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), | ||
| 136 | static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); | ||
| 137 | } | ||
| 60 | } | 138 | } |
| 61 | 139 | ||
| 62 | OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 140 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { |
| 63 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 141 | if (has_unified_vertex_buffers) { |
| 64 | const Device& device_, OGLStreamBuffer& stream_buffer_, | 142 | buffer.MakeResident(GL_READ_ONLY); |
| 65 | StateTracker& state_tracker) | 143 | glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, |
| 66 | : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { | 144 | static_cast<GLsizeiptr>(size)); |
| 67 | if (!device.HasFastBufferSubData()) { | 145 | } else { |
| 68 | return; | 146 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); |
| 147 | index_buffer_offset = offset; | ||
| 69 | } | 148 | } |
| 149 | } | ||
| 70 | 150 | ||
| 71 | static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); | 151 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, |
| 72 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 152 | u32 stride) { |
| 73 | for (const GLuint cbuf : cbufs) { | 153 | if (index >= max_attributes) { |
| 74 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); | 154 | return; |
| 155 | } | ||
| 156 | if (has_unified_vertex_buffers) { | ||
| 157 | buffer.MakeResident(GL_READ_ONLY); | ||
| 158 | glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); | ||
| 159 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, | ||
| 160 | buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); | ||
| 161 | } else { | ||
| 162 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 163 | static_cast<GLsizei>(stride)); | ||
| 75 | } | 164 | } |
| 76 | } | 165 | } |
| 77 | 166 | ||
| 78 | OGLBufferCache::~OGLBufferCache() { | 167 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 79 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 168 | u32 offset, u32 size) { |
| 169 | if (use_assembly_shaders) { | ||
| 170 | const GLuint sub_buffer = buffer.SubBuffer(offset); | ||
| 171 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0, | ||
| 172 | static_cast<GLsizeiptr>(size)); | ||
| 173 | } else { | ||
| 174 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 175 | const GLuint binding = base_binding + binding_index; | ||
| 176 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), | ||
| 177 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 178 | } | ||
| 80 | } | 179 | } |
| 81 | 180 | ||
| 82 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 181 | void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 83 | return std::make_shared<Buffer>(device, cpu_addr, size); | 182 | u32 size) { |
| 183 | if (use_assembly_shaders) { | ||
| 184 | glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, | ||
| 185 | buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size)); | ||
| 186 | } else { | ||
| 187 | glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), | ||
| 188 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 189 | } | ||
| 84 | } | 190 | } |
| 85 | 191 | ||
| 86 | OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { | 192 | void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 87 | return {0, 0, 0}; | 193 | u32 offset, u32 size, bool is_written) { |
| 194 | if (use_assembly_shaders) { | ||
| 195 | const BindlessSSBO ssbo{ | ||
| 196 | .address = buffer.HostGpuAddr() + offset, | ||
| 197 | .length = static_cast<GLsizei>(size), | ||
| 198 | .padding = 0, | ||
| 199 | }; | ||
| 200 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | ||
| 201 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | ||
| 202 | reinterpret_cast<const GLuint*>(&ssbo)); | ||
| 203 | } else { | ||
| 204 | const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 205 | const GLuint binding = base_binding + binding_index; | ||
| 206 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||
| 207 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 208 | } | ||
| 88 | } | 209 | } |
| 89 | 210 | ||
| 90 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | 211 | void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 91 | std::size_t size) { | 212 | u32 size, bool is_written) { |
| 92 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | 213 | if (use_assembly_shaders) { |
| 93 | const GLuint cbuf = cbufs[cbuf_cursor++]; | 214 | const BindlessSSBO ssbo{ |
| 215 | .address = buffer.HostGpuAddr() + offset, | ||
| 216 | .length = static_cast<GLsizei>(size), | ||
| 217 | .padding = 0, | ||
| 218 | }; | ||
| 219 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | ||
| 220 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, | ||
| 221 | reinterpret_cast<const GLuint*>(&ssbo)); | ||
| 222 | } else if (size == 0) { | ||
| 223 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||
| 224 | } else { | ||
| 225 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||
| 226 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 227 | } | ||
| 228 | } | ||
| 94 | 229 | ||
| 95 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | 230 | void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, |
| 96 | return {cbuf, 0, 0}; | 231 | u32 size) { |
| 232 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(), | ||
| 233 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 97 | } | 234 | } |
| 98 | 235 | ||
| 99 | } // namespace OpenGL | 236 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 17ee90316..f4d8871a9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -5,79 +5,167 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "common/alignment.h" | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/dynamic_library.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | 13 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 15 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 17 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 18 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 15 | 20 | ||
| 16 | namespace Core { | 21 | namespace Vulkan { |
| 17 | class System; | 22 | class Device; |
| 18 | } | 23 | class MemoryAllocator; |
| 24 | } // namespace Vulkan | ||
| 19 | 25 | ||
| 20 | namespace OpenGL { | 26 | namespace OpenGL { |
| 21 | 27 | ||
| 22 | class Device; | 28 | class BufferCacheRuntime; |
| 23 | class OGLStreamBuffer; | ||
| 24 | class RasterizerOpenGL; | ||
| 25 | class StateTracker; | ||
| 26 | 29 | ||
| 27 | class Buffer : public VideoCommon::BufferBlock { | 30 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { |
| 28 | public: | 31 | public: |
| 29 | explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); | 32 | explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, |
| 30 | ~Buffer(); | 33 | u64 size_bytes); |
| 34 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | ||
| 31 | 35 | ||
| 32 | void Upload(std::size_t offset, std::size_t data_size, const u8* data); | 36 | void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; |
| 33 | 37 | ||
| 34 | void Download(std::size_t offset, std::size_t data_size, u8* data); | 38 | void ImmediateDownload(size_t offset, std::span<u8> data) noexcept; |
| 35 | 39 | ||
| 36 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 40 | void MakeResident(GLenum access) noexcept; |
| 37 | std::size_t copy_size); | ||
| 38 | 41 | ||
| 39 | GLuint Handle() const noexcept { | 42 | [[nodiscard]] GLuint SubBuffer(u32 offset); |
| 40 | return gl_buffer.handle; | 43 | |
| 44 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | ||
| 45 | return address; | ||
| 41 | } | 46 | } |
| 42 | 47 | ||
| 43 | u64 Address() const noexcept { | 48 | [[nodiscard]] GLuint Handle() const noexcept { |
| 44 | return gpu_address; | 49 | return buffer.handle; |
| 45 | } | 50 | } |
| 46 | 51 | ||
| 47 | private: | 52 | private: |
| 48 | OGLBuffer gl_buffer; | 53 | void CreateMemoryObjects(BufferCacheRuntime& runtime); |
| 49 | OGLBuffer read_buffer; | 54 | |
| 50 | u64 gpu_address = 0; | 55 | GLuint64EXT address = 0; |
| 56 | Vulkan::MemoryCommit memory_commit; | ||
| 57 | OGLBuffer buffer; | ||
| 58 | GLenum current_residency_access = GL_NONE; | ||
| 59 | std::vector<std::pair<OGLBuffer, u32>> subs; | ||
| 51 | }; | 60 | }; |
| 52 | 61 | ||
| 53 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | 62 | class BufferCacheRuntime { |
| 54 | class OGLBufferCache final : public GenericBufferCache { | 63 | friend Buffer; |
| 64 | |||
| 55 | public: | 65 | public: |
| 56 | explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, | 66 | static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); |
| 57 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | 67 | |
| 58 | const Device& device, OGLStreamBuffer& stream_buffer, | 68 | explicit BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, |
| 59 | StateTracker& state_tracker); | 69 | Vulkan::MemoryAllocator* vulkan_memory_allocator_); |
| 60 | ~OGLBufferCache(); | 70 | |
| 71 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | ||
| 72 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 73 | |||
| 74 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); | ||
| 75 | |||
| 76 | void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); | ||
| 77 | |||
| 78 | void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); | ||
| 79 | |||
| 80 | void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size); | ||
| 81 | |||
| 82 | void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, | ||
| 83 | bool is_written); | ||
| 84 | |||
| 85 | void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, | ||
| 86 | bool is_written); | ||
| 61 | 87 | ||
| 62 | BufferInfo GetEmptyBuffer(std::size_t) override; | 88 | void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); |
| 63 | 89 | ||
| 64 | void Acquire() noexcept { | 90 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { |
| 65 | cbuf_cursor = 0; | 91 | if (use_assembly_shaders) { |
| 92 | const GLuint handle = fast_uniforms[stage][binding_index].handle; | ||
| 93 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||
| 94 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); | ||
| 95 | } else { | ||
| 96 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 97 | const GLuint binding = base_binding + binding_index; | ||
| 98 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||
| 99 | fast_uniforms[stage][binding_index].handle, 0, | ||
| 100 | static_cast<GLsizeiptr>(size)); | ||
| 101 | } | ||
| 66 | } | 102 | } |
| 67 | 103 | ||
| 68 | protected: | 104 | void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) { |
| 69 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 105 | if (use_assembly_shaders) { |
| 106 | glProgramBufferParametersIuivNV( | ||
| 107 | PABO_LUT[stage], binding_index, 0, | ||
| 108 | static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)), | ||
| 109 | reinterpret_cast<const GLuint*>(data.data())); | ||
| 110 | } else { | ||
| 111 | glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0, | ||
| 112 | static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { | ||
| 117 | const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); | ||
| 118 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 119 | const GLuint binding = base_binding + binding_index; | ||
| 120 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), | ||
| 121 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 122 | return mapped_span; | ||
| 123 | } | ||
| 70 | 124 | ||
| 71 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; | 125 | [[nodiscard]] const GLvoid* IndexOffset() const noexcept { |
| 126 | return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset)); | ||
| 127 | } | ||
| 128 | |||
| 129 | [[nodiscard]] bool HasFastBufferSubData() const noexcept { | ||
| 130 | return device.HasFastBufferSubData(); | ||
| 131 | } | ||
| 72 | 132 | ||
| 73 | private: | 133 | private: |
| 74 | static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | 134 | static constexpr std::array PABO_LUT{ |
| 75 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | 135 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| 136 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 137 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 138 | }; | ||
| 76 | 139 | ||
| 77 | const Device& device; | 140 | const Device& device; |
| 141 | const Vulkan::Device* vulkan_device; | ||
| 142 | Vulkan::MemoryAllocator* vulkan_memory_allocator; | ||
| 143 | std::optional<StreamBuffer> stream_buffer; | ||
| 144 | |||
| 145 | u32 max_attributes = 0; | ||
| 78 | 146 | ||
| 79 | std::size_t cbuf_cursor = 0; | 147 | bool use_assembly_shaders = false; |
| 80 | std::array<GLuint, NUM_CBUFS> cbufs{}; | 148 | bool has_unified_vertex_buffers = false; |
| 149 | |||
| 150 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||
| 151 | VideoCommon::NUM_STAGES> | ||
| 152 | fast_uniforms; | ||
| 153 | |||
| 154 | u32 index_buffer_offset = 0; | ||
| 155 | }; | ||
| 156 | |||
| 157 | struct BufferCacheParams { | ||
| 158 | using Runtime = OpenGL::BufferCacheRuntime; | ||
| 159 | using Buffer = OpenGL::Buffer; | ||
| 160 | |||
| 161 | static constexpr bool IS_OPENGL = true; | ||
| 162 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | ||
| 163 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; | ||
| 164 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | ||
| 165 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | ||
| 166 | static constexpr bool USE_MEMORY_MAPS = false; | ||
| 81 | }; | 167 | }; |
| 82 | 168 | ||
| 169 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||
| 170 | |||
| 83 | } // namespace OpenGL | 171 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 04c267ee4..0f492f006 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -21,9 +21,7 @@ | |||
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 22 | 22 | ||
| 23 | namespace OpenGL { | 23 | namespace OpenGL { |
| 24 | |||
| 25 | namespace { | 24 | namespace { |
| 26 | |||
| 27 | // One uniform block is reserved for emulation purposes | 25 | // One uniform block is reserved for emulation purposes |
| 28 | constexpr u32 ReservedUniformBlocks = 1; | 26 | constexpr u32 ReservedUniformBlocks = 1; |
| 29 | 27 | ||
| @@ -197,11 +195,13 @@ bool IsASTCSupported() { | |||
| 197 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 195 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 198 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); | 196 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); |
| 199 | } | 197 | } |
| 200 | |||
| 201 | } // Anonymous namespace | 198 | } // Anonymous namespace |
| 202 | 199 | ||
| 203 | Device::Device() | 200 | Device::Device(bool has_vulkan_instance) { |
| 204 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | 201 | if (!GLAD_GL_VERSION_4_6) { |
| 202 | LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); | ||
| 203 | throw std::runtime_error{"Insufficient version"}; | ||
| 204 | } | ||
| 205 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 205 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 206 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); | 206 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); |
| 207 | const std::vector extensions = GetExtensions(); | 207 | const std::vector extensions = GetExtensions(); |
| @@ -217,6 +217,9 @@ Device::Device() | |||
| 217 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | 217 | "Beta driver 443.24 is known to have issues. There might be performance issues."); |
| 218 | disable_fast_buffer_sub_data = true; | 218 | disable_fast_buffer_sub_data = true; |
| 219 | } | 219 | } |
| 220 | |||
| 221 | max_uniform_buffers = BuildMaxUniformBuffers(); | ||
| 222 | base_bindings = BuildBaseBindings(); | ||
| 220 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 223 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 221 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 224 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 222 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 225 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| @@ -243,7 +246,8 @@ Device::Device() | |||
| 243 | 246 | ||
| 244 | use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && | 247 | use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && |
| 245 | GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && | 248 | GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && |
| 246 | GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; | 249 | GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2 && |
| 250 | has_vulkan_instance; | ||
| 247 | 251 | ||
| 248 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); | 252 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); |
| 249 | use_driver_cache = is_nvidia; | 253 | use_driver_cache = is_nvidia; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9141de635..eb62ae52d 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -10,18 +10,16 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL { | 11 | namespace OpenGL { |
| 12 | 12 | ||
| 13 | static constexpr u32 EmulationUniformBlockBinding = 0; | 13 | class Device { |
| 14 | |||
| 15 | class Device final { | ||
| 16 | public: | 14 | public: |
| 17 | struct BaseBindings final { | 15 | struct BaseBindings { |
| 18 | u32 uniform_buffer{}; | 16 | u32 uniform_buffer{}; |
| 19 | u32 shader_storage_buffer{}; | 17 | u32 shader_storage_buffer{}; |
| 20 | u32 sampler{}; | 18 | u32 sampler{}; |
| 21 | u32 image{}; | 19 | u32 image{}; |
| 22 | }; | 20 | }; |
| 23 | 21 | ||
| 24 | explicit Device(); | 22 | explicit Device(bool has_vulkan_instance); |
| 25 | explicit Device(std::nullptr_t); | 23 | explicit Device(std::nullptr_t); |
| 26 | 24 | ||
| 27 | u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { | 25 | u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 3e9c922f5..151290101 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -47,7 +47,7 @@ void GLInnerFence::Wait() { | |||
| 47 | 47 | ||
| 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, |
| 49 | Tegra::GPU& gpu_, TextureCache& texture_cache_, | 49 | Tegra::GPU& gpu_, TextureCache& texture_cache_, |
| 50 | OGLBufferCache& buffer_cache_, QueryCache& query_cache_) | 50 | BufferCache& buffer_cache_, QueryCache& query_cache_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} |
| 52 | 52 | ||
| 53 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | 53 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 30dbee613..e714aa115 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -32,14 +32,13 @@ private: | |||
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| 34 | using Fence = std::shared_ptr<GLInnerFence>; | 34 | using Fence = std::shared_ptr<GLInnerFence>; |
| 35 | using GenericFenceManager = | 35 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; |
| 36 | VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; | ||
| 37 | 36 | ||
| 38 | class FenceManagerOpenGL final : public GenericFenceManager { | 37 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 39 | public: | 38 | public: |
| 40 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 39 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, |
| 41 | TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, | 40 | TextureCache& texture_cache, BufferCache& buffer_cache, |
| 42 | QueryCache& query_cache_); | 41 | QueryCache& query_cache); |
| 43 | 42 | ||
| 44 | protected: | 43 | protected: |
| 45 | Fence CreateFence(u32 value, bool is_stubbed) override; | 44 | Fence CreateFence(u32 value, bool is_stubbed) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ea4ca9a82..52499ee4c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -44,17 +44,10 @@ using VideoCore::Surface::PixelFormat; | |||
| 44 | using VideoCore::Surface::SurfaceTarget; | 44 | using VideoCore::Surface::SurfaceTarget; |
| 45 | using VideoCore::Surface::SurfaceType; | 45 | using VideoCore::Surface::SurfaceType; |
| 46 | 46 | ||
| 47 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); | ||
| 48 | MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); | ||
| 49 | MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192)); | ||
| 50 | MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192)); | ||
| 51 | MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192)); | ||
| 52 | MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192)); | ||
| 53 | MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192)); | ||
| 54 | MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | 47 | MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); |
| 48 | MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192)); | ||
| 55 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | 49 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); |
| 56 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 50 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); |
| 57 | MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); | ||
| 58 | 51 | ||
| 59 | namespace { | 52 | namespace { |
| 60 | 53 | ||
| @@ -101,20 +94,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
| 101 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | 94 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 102 | } | 95 | } |
| 103 | 96 | ||
| 104 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 105 | const ConstBufferEntry& entry) { | ||
| 106 | if (!entry.IsIndirect()) { | ||
| 107 | return entry.GetSize(); | ||
| 108 | } | ||
| 109 | if (buffer.size > Maxwell::MaxConstBufferSize) { | ||
| 110 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||
| 111 | Maxwell::MaxConstBufferSize); | ||
| 112 | return Maxwell::MaxConstBufferSize; | ||
| 113 | } | ||
| 114 | |||
| 115 | return buffer.size; | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Translates hardware transform feedback indices | 97 | /// Translates hardware transform feedback indices |
| 119 | /// @param location Hardware location | 98 | /// @param location Hardware location |
| 120 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | 99 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments |
| @@ -147,14 +126,6 @@ void oglEnable(GLenum cap, bool state) { | |||
| 147 | (state ? glEnable : glDisable)(cap); | 126 | (state ? glEnable : glDisable)(cap); |
| 148 | } | 127 | } |
| 149 | 128 | ||
| 150 | void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { | ||
| 151 | if (num_ssbos == 0) { | ||
| 152 | return; | ||
| 153 | } | ||
| 154 | glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), | ||
| 155 | reinterpret_cast<const GLuint*>(ssbos)); | ||
| 156 | } | ||
| 157 | |||
| 158 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | 129 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |
| 159 | if (entry.is_buffer) { | 130 | if (entry.is_buffer) { |
| 160 | return ImageViewType::Buffer; | 131 | return ImageViewType::Buffer; |
| @@ -196,49 +167,35 @@ ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | |||
| 196 | 167 | ||
| 197 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 168 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 198 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 169 | Core::Memory::Memory& cpu_memory_, const Device& device_, |
| 170 | const Vulkan::Device* vulkan_device, | ||
| 171 | Vulkan::MemoryAllocator* vulkan_memory_allocator, | ||
| 199 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 172 | ScreenInfo& screen_info_, ProgramManager& program_manager_, |
| 200 | StateTracker& state_tracker_) | 173 | StateTracker& state_tracker_) |
| 201 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), | 174 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), |
| 202 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), | 175 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), |
| 203 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), | 176 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), |
| 204 | stream_buffer(device, state_tracker), | ||
| 205 | texture_cache_runtime(device, program_manager, state_tracker), | 177 | texture_cache_runtime(device, program_manager, state_tracker), |
| 206 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 178 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 179 | buffer_cache_runtime(device, vulkan_device, vulkan_memory_allocator), | ||
| 180 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||
| 207 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 181 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 208 | query_cache(*this, maxwell3d, gpu_memory), | 182 | query_cache(*this, maxwell3d, gpu_memory), |
| 209 | buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), | ||
| 210 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 183 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 211 | async_shaders(emu_window_) { | 184 | async_shaders(emu_window_) { |
| 212 | unified_uniform_buffer.Create(); | ||
| 213 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | ||
| 214 | |||
| 215 | if (device.UseAssemblyShaders()) { | ||
| 216 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 217 | for (const GLuint cbuf : staging_cbufs) { | ||
| 218 | glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), | ||
| 219 | nullptr, 0); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | if (device.UseAsynchronousShaders()) { | 185 | if (device.UseAsynchronousShaders()) { |
| 223 | async_shaders.AllocateWorkers(); | 186 | async_shaders.AllocateWorkers(); |
| 224 | } | 187 | } |
| 225 | } | 188 | } |
| 226 | 189 | ||
| 227 | RasterizerOpenGL::~RasterizerOpenGL() { | 190 | RasterizerOpenGL::~RasterizerOpenGL() = default; |
| 228 | if (device.UseAssemblyShaders()) { | ||
| 229 | glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | 191 | ||
| 233 | void RasterizerOpenGL::SetupVertexFormat() { | 192 | void RasterizerOpenGL::SyncVertexFormats() { |
| 234 | auto& flags = maxwell3d.dirty.flags; | 193 | auto& flags = maxwell3d.dirty.flags; |
| 235 | if (!flags[Dirty::VertexFormats]) { | 194 | if (!flags[Dirty::VertexFormats]) { |
| 236 | return; | 195 | return; |
| 237 | } | 196 | } |
| 238 | flags[Dirty::VertexFormats] = false; | 197 | flags[Dirty::VertexFormats] = false; |
| 239 | 198 | ||
| 240 | MICROPROFILE_SCOPE(OpenGL_VAO); | ||
| 241 | |||
| 242 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables | 199 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables |
| 243 | // the first 16 vertex attributes always, as we don't know which ones are actually used until | 200 | // the first 16 vertex attributes always, as we don't know which ones are actually used until |
| 244 | // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to | 201 | // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to |
| @@ -274,55 +231,7 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 274 | } | 231 | } |
| 275 | } | 232 | } |
| 276 | 233 | ||
| 277 | void RasterizerOpenGL::SetupVertexBuffer() { | 234 | void RasterizerOpenGL::SyncVertexInstances() { |
| 278 | auto& flags = maxwell3d.dirty.flags; | ||
| 279 | if (!flags[Dirty::VertexBuffers]) { | ||
| 280 | return; | ||
| 281 | } | ||
| 282 | flags[Dirty::VertexBuffers] = false; | ||
| 283 | |||
| 284 | MICROPROFILE_SCOPE(OpenGL_VB); | ||
| 285 | |||
| 286 | const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); | ||
| 287 | |||
| 288 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 289 | const auto& regs = maxwell3d.regs; | ||
| 290 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { | ||
| 291 | if (!flags[Dirty::VertexBuffer0 + index]) { | ||
| 292 | continue; | ||
| 293 | } | ||
| 294 | flags[Dirty::VertexBuffer0 + index] = false; | ||
| 295 | |||
| 296 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 297 | if (!vertex_array.IsEnabled()) { | ||
| 298 | continue; | ||
| 299 | } | ||
| 300 | |||
| 301 | const GPUVAddr start = vertex_array.StartAddress(); | ||
| 302 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||
| 303 | ASSERT(end >= start); | ||
| 304 | |||
| 305 | const GLuint gl_index = static_cast<GLuint>(index); | ||
| 306 | const u64 size = end - start; | ||
| 307 | if (size == 0) { | ||
| 308 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); | ||
| 309 | if (use_unified_memory) { | ||
| 310 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); | ||
| 311 | } | ||
| 312 | continue; | ||
| 313 | } | ||
| 314 | const auto info = buffer_cache.UploadMemory(start, size); | ||
| 315 | if (use_unified_memory) { | ||
| 316 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); | ||
| 317 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, | ||
| 318 | info.address + info.offset, size); | ||
| 319 | } else { | ||
| 320 | glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); | ||
| 321 | } | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | void RasterizerOpenGL::SetupVertexInstances() { | ||
| 326 | auto& flags = maxwell3d.dirty.flags; | 235 | auto& flags = maxwell3d.dirty.flags; |
| 327 | if (!flags[Dirty::VertexInstances]) { | 236 | if (!flags[Dirty::VertexInstances]) { |
| 328 | return; | 237 | return; |
| @@ -343,17 +252,7 @@ void RasterizerOpenGL::SetupVertexInstances() { | |||
| 343 | } | 252 | } |
| 344 | } | 253 | } |
| 345 | 254 | ||
| 346 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | 255 | void RasterizerOpenGL::SetupShaders(bool is_indexed) { |
| 347 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 348 | const auto& regs = maxwell3d.regs; | ||
| 349 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 350 | const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 351 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); | ||
| 352 | return info.offset; | ||
| 353 | } | ||
| 354 | |||
| 355 | void RasterizerOpenGL::SetupShaders() { | ||
| 356 | MICROPROFILE_SCOPE(OpenGL_Shader); | ||
| 357 | u32 clip_distances = 0; | 256 | u32 clip_distances = 0; |
| 358 | 257 | ||
| 359 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | 258 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; |
| @@ -410,11 +309,19 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 410 | const size_t stage = index == 0 ? 0 : index - 1; | 309 | const size_t stage = index == 0 ? 0 : index - 1; |
| 411 | shaders[stage] = shader; | 310 | shaders[stage] = shader; |
| 412 | 311 | ||
| 413 | SetupDrawConstBuffers(stage, shader); | ||
| 414 | SetupDrawGlobalMemory(stage, shader); | ||
| 415 | SetupDrawTextures(shader, stage); | 312 | SetupDrawTextures(shader, stage); |
| 416 | SetupDrawImages(shader, stage); | 313 | SetupDrawImages(shader, stage); |
| 417 | 314 | ||
| 315 | buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); | ||
| 316 | |||
| 317 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 318 | u32 ssbo_index = 0; | ||
| 319 | for (const auto& buffer : shader->GetEntries().global_memory_entries) { | ||
| 320 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 321 | buffer.cbuf_offset, buffer.is_written); | ||
| 322 | ++ssbo_index; | ||
| 323 | } | ||
| 324 | |||
| 418 | // Workaround for Intel drivers. | 325 | // Workaround for Intel drivers. |
| 419 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 326 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 420 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 327 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| @@ -430,43 +337,26 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 430 | SyncClipEnabled(clip_distances); | 337 | SyncClipEnabled(clip_distances); |
| 431 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | 338 | maxwell3d.dirty.flags[Dirty::Shaders] = false; |
| 432 | 339 | ||
| 340 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 341 | |||
| 433 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 342 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 434 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | 343 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 435 | 344 | ||
| 345 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 346 | |||
| 436 | size_t image_view_index = 0; | 347 | size_t image_view_index = 0; |
| 437 | size_t texture_index = 0; | 348 | size_t texture_index = 0; |
| 438 | size_t image_index = 0; | 349 | size_t image_index = 0; |
| 439 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 350 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 440 | const Shader* const shader = shaders[stage]; | 351 | const Shader* const shader = shaders[stage]; |
| 441 | if (shader) { | 352 | if (!shader) { |
| 442 | const auto base = device.GetBaseBindings(stage); | ||
| 443 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 444 | texture_index, image_index); | ||
| 445 | } | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||
| 450 | const auto& regs = maxwell3d.regs; | ||
| 451 | |||
| 452 | std::size_t size = 0; | ||
| 453 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 454 | if (!regs.vertex_array[index].IsEnabled()) | ||
| 455 | continue; | 353 | continue; |
| 456 | 354 | } | |
| 457 | const GPUVAddr start = regs.vertex_array[index].StartAddress(); | 355 | buffer_cache.BindHostStageBuffers(stage); |
| 458 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | 356 | const auto& base = device.GetBaseBindings(stage); |
| 459 | 357 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | |
| 460 | size += end - start; | 358 | texture_index, image_index); |
| 461 | ASSERT(end >= start); | ||
| 462 | } | 359 | } |
| 463 | |||
| 464 | return size; | ||
| 465 | } | ||
| 466 | |||
| 467 | std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { | ||
| 468 | return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * | ||
| 469 | static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||
| 470 | } | 360 | } |
| 471 | 361 | ||
| 472 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, | 362 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, |
| @@ -475,6 +365,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s | |||
| 475 | } | 365 | } |
| 476 | 366 | ||
| 477 | void RasterizerOpenGL::Clear() { | 367 | void RasterizerOpenGL::Clear() { |
| 368 | MICROPROFILE_SCOPE(OpenGL_Clears); | ||
| 478 | if (!maxwell3d.ShouldExecute()) { | 369 | if (!maxwell3d.ShouldExecute()) { |
| 479 | return; | 370 | return; |
| 480 | } | 371 | } |
| @@ -525,11 +416,9 @@ void RasterizerOpenGL::Clear() { | |||
| 525 | } | 416 | } |
| 526 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); | 417 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); |
| 527 | 418 | ||
| 528 | { | 419 | std::scoped_lock lock{texture_cache.mutex}; |
| 529 | auto lock = texture_cache.AcquireLock(); | 420 | texture_cache.UpdateRenderTargets(true); |
| 530 | texture_cache.UpdateRenderTargets(true); | 421 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| 531 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 532 | } | ||
| 533 | 422 | ||
| 534 | if (use_color) { | 423 | if (use_color) { |
| 535 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 424 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| @@ -541,7 +430,6 @@ void RasterizerOpenGL::Clear() { | |||
| 541 | } else if (use_stencil) { | 430 | } else if (use_stencil) { |
| 542 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 431 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 543 | } | 432 | } |
| 544 | |||
| 545 | ++num_queued_commands; | 433 | ++num_queued_commands; |
| 546 | } | 434 | } |
| 547 | 435 | ||
| @@ -550,75 +438,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 550 | 438 | ||
| 551 | query_cache.UpdateCounters(); | 439 | query_cache.UpdateCounters(); |
| 552 | 440 | ||
| 553 | SyncViewport(); | 441 | SyncState(); |
| 554 | SyncRasterizeEnable(); | ||
| 555 | SyncPolygonModes(); | ||
| 556 | SyncColorMask(); | ||
| 557 | SyncFragmentColorClampState(); | ||
| 558 | SyncMultiSampleState(); | ||
| 559 | SyncDepthTestState(); | ||
| 560 | SyncDepthClamp(); | ||
| 561 | SyncStencilTestState(); | ||
| 562 | SyncBlendState(); | ||
| 563 | SyncLogicOpState(); | ||
| 564 | SyncCullMode(); | ||
| 565 | SyncPrimitiveRestart(); | ||
| 566 | SyncScissorTest(); | ||
| 567 | SyncPointState(); | ||
| 568 | SyncLineState(); | ||
| 569 | SyncPolygonOffset(); | ||
| 570 | SyncAlphaTest(); | ||
| 571 | SyncFramebufferSRGB(); | ||
| 572 | |||
| 573 | buffer_cache.Acquire(); | ||
| 574 | current_cbuf = 0; | ||
| 575 | |||
| 576 | std::size_t buffer_size = CalculateVertexArraysSize(); | ||
| 577 | |||
| 578 | // Add space for index buffer | ||
| 579 | if (is_indexed) { | ||
| 580 | buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); | ||
| 581 | } | ||
| 582 | |||
| 583 | // Uniform space for the 5 shader stages | ||
| 584 | buffer_size = | ||
| 585 | Common::AlignUp<std::size_t>(buffer_size, 4) + | ||
| 586 | (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; | ||
| 587 | |||
| 588 | // Add space for at least 18 constant buffers | ||
| 589 | buffer_size += Maxwell::MaxConstBuffers * | ||
| 590 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 591 | |||
| 592 | // Prepare the vertex array. | ||
| 593 | buffer_cache.Map(buffer_size); | ||
| 594 | |||
| 595 | // Prepare vertex array format. | ||
| 596 | SetupVertexFormat(); | ||
| 597 | |||
| 598 | // Upload vertex and index data. | ||
| 599 | SetupVertexBuffer(); | ||
| 600 | SetupVertexInstances(); | ||
| 601 | GLintptr index_buffer_offset = 0; | ||
| 602 | if (is_indexed) { | ||
| 603 | index_buffer_offset = SetupIndexBuffer(); | ||
| 604 | } | ||
| 605 | |||
| 606 | // Setup emulation uniform buffer. | ||
| 607 | if (!device.UseAssemblyShaders()) { | ||
| 608 | MaxwellUniformData ubo; | ||
| 609 | ubo.SetFromRegs(maxwell3d); | ||
| 610 | const auto info = | ||
| 611 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||
| 612 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, | ||
| 613 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 614 | } | ||
| 615 | 442 | ||
| 616 | // Setup shaders and their used resources. | 443 | // Setup shaders and their used resources. |
| 617 | auto lock = texture_cache.AcquireLock(); | 444 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 618 | SetupShaders(); | 445 | SetupShaders(is_indexed); |
| 619 | 446 | ||
| 620 | // Signal the buffer cache that we are not going to upload more things. | ||
| 621 | buffer_cache.Unmap(); | ||
| 622 | texture_cache.UpdateRenderTargets(false); | 447 | texture_cache.UpdateRenderTargets(false); |
| 623 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 448 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| 624 | program_manager.BindGraphicsPipeline(); | 449 | program_manager.BindGraphicsPipeline(); |
| @@ -632,7 +457,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 632 | if (is_indexed) { | 457 | if (is_indexed) { |
| 633 | const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); | 458 | const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); |
| 634 | const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); | 459 | const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); |
| 635 | const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); | 460 | const GLvoid* const offset = buffer_cache_runtime.IndexOffset(); |
| 636 | const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); | 461 | const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); |
| 637 | if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { | 462 | if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { |
| 638 | glDrawElements(primitive_mode, num_vertices, format, offset); | 463 | glDrawElements(primitive_mode, num_vertices, format, offset); |
| @@ -672,22 +497,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 672 | } | 497 | } |
| 673 | 498 | ||
| 674 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 499 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| 675 | buffer_cache.Acquire(); | ||
| 676 | current_cbuf = 0; | ||
| 677 | |||
| 678 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); | 500 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); |
| 679 | 501 | ||
| 680 | auto lock = texture_cache.AcquireLock(); | 502 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 681 | BindComputeTextures(kernel); | 503 | BindComputeTextures(kernel); |
| 682 | 504 | ||
| 683 | const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * | 505 | const auto& entries = kernel->GetEntries(); |
| 684 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 506 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); |
| 685 | buffer_cache.Map(buffer_size); | 507 | buffer_cache.UnbindComputeStorageBuffers(); |
| 686 | 508 | u32 ssbo_index = 0; | |
| 687 | SetupComputeConstBuffers(kernel); | 509 | for (const auto& buffer : entries.global_memory_entries) { |
| 688 | SetupComputeGlobalMemory(kernel); | 510 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, |
| 689 | 511 | buffer.is_written); | |
| 690 | buffer_cache.Unmap(); | 512 | ++ssbo_index; |
| 513 | } | ||
| 514 | buffer_cache.UpdateComputeBuffers(); | ||
| 515 | buffer_cache.BindHostComputeBuffers(); | ||
| 691 | 516 | ||
| 692 | const auto& launch_desc = kepler_compute.launch_description; | 517 | const auto& launch_desc = kepler_compute.launch_description; |
| 693 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 518 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| @@ -703,6 +528,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 703 | query_cache.Query(gpu_addr, type, timestamp); | 528 | query_cache.Query(gpu_addr, type, timestamp); |
| 704 | } | 529 | } |
| 705 | 530 | ||
| 531 | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 532 | u32 size) { | ||
| 533 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 534 | buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||
| 535 | } | ||
| 536 | |||
| 706 | void RasterizerOpenGL::FlushAll() {} | 537 | void RasterizerOpenGL::FlushAll() {} |
| 707 | 538 | ||
| 708 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 539 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| @@ -711,19 +542,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 711 | return; | 542 | return; |
| 712 | } | 543 | } |
| 713 | { | 544 | { |
| 714 | auto lock = texture_cache.AcquireLock(); | 545 | std::scoped_lock lock{texture_cache.mutex}; |
| 715 | texture_cache.DownloadMemory(addr, size); | 546 | texture_cache.DownloadMemory(addr, size); |
| 716 | } | 547 | } |
| 717 | buffer_cache.FlushRegion(addr, size); | 548 | { |
| 549 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 550 | buffer_cache.DownloadMemory(addr, size); | ||
| 551 | } | ||
| 718 | query_cache.FlushRegion(addr, size); | 552 | query_cache.FlushRegion(addr, size); |
| 719 | } | 553 | } |
| 720 | 554 | ||
| 721 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | 555 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { |
| 556 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 722 | if (!Settings::IsGPULevelHigh()) { | 557 | if (!Settings::IsGPULevelHigh()) { |
| 723 | return buffer_cache.MustFlushRegion(addr, size); | 558 | return buffer_cache.IsRegionGpuModified(addr, size); |
| 724 | } | 559 | } |
| 725 | return texture_cache.IsRegionGpuModified(addr, size) || | 560 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 726 | buffer_cache.MustFlushRegion(addr, size); | 561 | buffer_cache.IsRegionGpuModified(addr, size); |
| 727 | } | 562 | } |
| 728 | 563 | ||
| 729 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 564 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -732,11 +567,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 732 | return; | 567 | return; |
| 733 | } | 568 | } |
| 734 | { | 569 | { |
| 735 | auto lock = texture_cache.AcquireLock(); | 570 | std::scoped_lock lock{texture_cache.mutex}; |
| 736 | texture_cache.WriteMemory(addr, size); | 571 | texture_cache.WriteMemory(addr, size); |
| 737 | } | 572 | } |
| 573 | { | ||
| 574 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 575 | buffer_cache.WriteMemory(addr, size); | ||
| 576 | } | ||
| 738 | shader_cache.InvalidateRegion(addr, size); | 577 | shader_cache.InvalidateRegion(addr, size); |
| 739 | buffer_cache.InvalidateRegion(addr, size); | ||
| 740 | query_cache.InvalidateRegion(addr, size); | 578 | query_cache.InvalidateRegion(addr, size); |
| 741 | } | 579 | } |
| 742 | 580 | ||
| @@ -745,26 +583,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 745 | if (addr == 0 || size == 0) { | 583 | if (addr == 0 || size == 0) { |
| 746 | return; | 584 | return; |
| 747 | } | 585 | } |
| 586 | shader_cache.OnCPUWrite(addr, size); | ||
| 748 | { | 587 | { |
| 749 | auto lock = texture_cache.AcquireLock(); | 588 | std::scoped_lock lock{texture_cache.mutex}; |
| 750 | texture_cache.WriteMemory(addr, size); | 589 | texture_cache.WriteMemory(addr, size); |
| 751 | } | 590 | } |
| 752 | shader_cache.OnCPUWrite(addr, size); | 591 | { |
| 753 | buffer_cache.OnCPUWrite(addr, size); | 592 | std::scoped_lock lock{buffer_cache.mutex}; |
| 593 | buffer_cache.CachedWriteMemory(addr, size); | ||
| 594 | } | ||
| 754 | } | 595 | } |
| 755 | 596 | ||
| 756 | void RasterizerOpenGL::SyncGuestHost() { | 597 | void RasterizerOpenGL::SyncGuestHost() { |
| 757 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 598 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 758 | buffer_cache.SyncGuestHost(); | ||
| 759 | shader_cache.SyncGuestHost(); | 599 | shader_cache.SyncGuestHost(); |
| 600 | { | ||
| 601 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 602 | buffer_cache.FlushCachedWrites(); | ||
| 603 | } | ||
| 760 | } | 604 | } |
| 761 | 605 | ||
| 762 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | 606 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { |
| 763 | { | 607 | { |
| 764 | auto lock = texture_cache.AcquireLock(); | 608 | std::scoped_lock lock{texture_cache.mutex}; |
| 765 | texture_cache.UnmapMemory(addr, size); | 609 | texture_cache.UnmapMemory(addr, size); |
| 766 | } | 610 | } |
| 767 | buffer_cache.OnCPUWrite(addr, size); | 611 | { |
| 612 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 613 | buffer_cache.WriteMemory(addr, size); | ||
| 614 | } | ||
| 768 | shader_cache.OnCPUWrite(addr, size); | 615 | shader_cache.OnCPUWrite(addr, size); |
| 769 | } | 616 | } |
| 770 | 617 | ||
| @@ -799,14 +646,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 799 | } | 646 | } |
| 800 | 647 | ||
| 801 | void RasterizerOpenGL::WaitForIdle() { | 648 | void RasterizerOpenGL::WaitForIdle() { |
| 802 | // Place a barrier on everything that is not framebuffer related. | 649 | glMemoryBarrier(GL_ALL_BARRIER_BITS); |
| 803 | // This is related to another flag that is not currently implemented. | ||
| 804 | glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT | | ||
| 805 | GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | | ||
| 806 | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | | ||
| 807 | GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | | ||
| 808 | GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT | | ||
| 809 | GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); | ||
| 810 | } | 650 | } |
| 811 | 651 | ||
| 812 | void RasterizerOpenGL::FragmentBarrier() { | 652 | void RasterizerOpenGL::FragmentBarrier() { |
| @@ -831,18 +671,21 @@ void RasterizerOpenGL::TickFrame() { | |||
| 831 | num_queued_commands = 0; | 671 | num_queued_commands = 0; |
| 832 | 672 | ||
| 833 | fence_manager.TickFrame(); | 673 | fence_manager.TickFrame(); |
| 834 | buffer_cache.TickFrame(); | ||
| 835 | { | 674 | { |
| 836 | auto lock = texture_cache.AcquireLock(); | 675 | std::scoped_lock lock{texture_cache.mutex}; |
| 837 | texture_cache.TickFrame(); | 676 | texture_cache.TickFrame(); |
| 838 | } | 677 | } |
| 678 | { | ||
| 679 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 680 | buffer_cache.TickFrame(); | ||
| 681 | } | ||
| 839 | } | 682 | } |
| 840 | 683 | ||
| 841 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 684 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 842 | const Tegra::Engines::Fermi2D::Surface& dst, | 685 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 843 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 686 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 844 | MICROPROFILE_SCOPE(OpenGL_Blits); | 687 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 845 | auto lock = texture_cache.AcquireLock(); | 688 | std::scoped_lock lock{texture_cache.mutex}; |
| 846 | texture_cache.BlitImage(dst, src, copy_config); | 689 | texture_cache.BlitImage(dst, src, copy_config); |
| 847 | return true; | 690 | return true; |
| 848 | } | 691 | } |
| @@ -854,7 +697,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 854 | } | 697 | } |
| 855 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 698 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 856 | 699 | ||
| 857 | auto lock = texture_cache.AcquireLock(); | 700 | std::scoped_lock lock{texture_cache.mutex}; |
| 858 | ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; | 701 | ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; |
| 859 | if (!image_view) { | 702 | if (!image_view) { |
| 860 | return false; | 703 | return false; |
| @@ -921,166 +764,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te | |||
| 921 | } | 764 | } |
| 922 | } | 765 | } |
| 923 | 766 | ||
| 924 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | ||
| 925 | static constexpr std::array PARAMETER_LUT{ | ||
| 926 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 927 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 928 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 929 | }; | ||
| 930 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 931 | const auto& stages = maxwell3d.state.shader_stages; | ||
| 932 | const auto& shader_stage = stages[stage_index]; | ||
| 933 | const auto& entries = shader->GetEntries(); | ||
| 934 | const bool use_unified = entries.use_unified_uniforms; | ||
| 935 | const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; | ||
| 936 | |||
| 937 | const auto base_bindings = device.GetBaseBindings(stage_index); | ||
| 938 | u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; | ||
| 939 | for (const auto& entry : entries.const_buffers) { | ||
| 940 | const u32 index = entry.GetIndex(); | ||
| 941 | const auto& buffer = shader_stage.const_buffers[index]; | ||
| 942 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, | ||
| 943 | base_unified_offset + index * Maxwell::MaxConstBufferSize); | ||
| 944 | ++binding; | ||
| 945 | } | ||
| 946 | if (use_unified) { | ||
| 947 | const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + | ||
| 948 | entries.global_memory_entries.size()); | ||
| 949 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, | ||
| 950 | base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 951 | } | ||
| 952 | } | ||
| 953 | |||
| 954 | void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { | ||
| 955 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 956 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 957 | const auto& entries = kernel->GetEntries(); | ||
| 958 | const bool use_unified = entries.use_unified_uniforms; | ||
| 959 | |||
| 960 | u32 binding = 0; | ||
| 961 | for (const auto& entry : entries.const_buffers) { | ||
| 962 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 963 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 964 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 965 | buffer.address = config.Address(); | ||
| 966 | buffer.size = config.size; | ||
| 967 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 968 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, | ||
| 969 | use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); | ||
| 970 | ++binding; | ||
| 971 | } | ||
| 972 | if (use_unified) { | ||
| 973 | const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); | ||
| 974 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, | ||
| 975 | NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 976 | } | ||
| 977 | } | ||
| 978 | |||
| 979 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | ||
| 980 | const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 981 | const ConstBufferEntry& entry, bool use_unified, | ||
| 982 | std::size_t unified_offset) { | ||
| 983 | if (!buffer.enabled) { | ||
| 984 | // Set values to zero to unbind buffers | ||
| 985 | if (device.UseAssemblyShaders()) { | ||
| 986 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); | ||
| 987 | } else { | ||
| 988 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); | ||
| 989 | } | ||
| 990 | return; | ||
| 991 | } | ||
| 992 | |||
| 993 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | ||
| 994 | // UBO alignment requirements. | ||
| 995 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | ||
| 996 | |||
| 997 | const bool fast_upload = !use_unified && device.HasFastBufferSubData(); | ||
| 998 | |||
| 999 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); | ||
| 1000 | const GPUVAddr gpu_addr = buffer.address; | ||
| 1001 | auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); | ||
| 1002 | |||
| 1003 | if (device.UseAssemblyShaders()) { | ||
| 1004 | UNIMPLEMENTED_IF(use_unified); | ||
| 1005 | if (info.offset != 0) { | ||
| 1006 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||
| 1007 | glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); | ||
| 1008 | info.handle = staging_cbuf; | ||
| 1009 | info.offset = 0; | ||
| 1010 | } | ||
| 1011 | glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); | ||
| 1012 | return; | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | if (use_unified) { | ||
| 1016 | glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, | ||
| 1017 | unified_offset, size); | ||
| 1018 | } else { | ||
| 1019 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { | ||
| 1024 | static constexpr std::array TARGET_LUT = { | ||
| 1025 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||
| 1026 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||
| 1027 | }; | ||
| 1028 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | ||
| 1029 | const auto& entries{shader->GetEntries().global_memory_entries}; | ||
| 1030 | |||
| 1031 | std::array<BindlessSSBO, 32> ssbos; | ||
| 1032 | ASSERT(entries.size() < ssbos.size()); | ||
| 1033 | |||
| 1034 | const bool assembly_shaders = device.UseAssemblyShaders(); | ||
| 1035 | u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 1036 | for (const auto& entry : entries) { | ||
| 1037 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | ||
| 1038 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||
| 1039 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||
| 1040 | SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||
| 1041 | ++binding; | ||
| 1042 | } | ||
| 1043 | if (assembly_shaders) { | ||
| 1044 | UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); | ||
| 1045 | } | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | ||
| 1049 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||
| 1050 | const auto& entries{kernel->GetEntries().global_memory_entries}; | ||
| 1051 | |||
| 1052 | std::array<BindlessSSBO, 32> ssbos; | ||
| 1053 | ASSERT(entries.size() < ssbos.size()); | ||
| 1054 | |||
| 1055 | u32 binding = 0; | ||
| 1056 | for (const auto& entry : entries) { | ||
| 1057 | const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | ||
| 1058 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||
| 1059 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||
| 1060 | SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||
| 1061 | ++binding; | ||
| 1062 | } | ||
| 1063 | if (device.UseAssemblyShaders()) { | ||
| 1064 | UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); | ||
| 1065 | } | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | ||
| 1069 | GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { | ||
| 1070 | const size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 1071 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | ||
| 1072 | if (device.UseAssemblyShaders()) { | ||
| 1073 | *ssbo = BindlessSSBO{ | ||
| 1074 | .address = static_cast<GLuint64EXT>(info.address + info.offset), | ||
| 1075 | .length = static_cast<GLsizei>(size), | ||
| 1076 | .padding = 0, | ||
| 1077 | }; | ||
| 1078 | } else { | ||
| 1079 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | ||
| 1080 | static_cast<GLsizeiptr>(size)); | ||
| 1081 | } | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { | 767 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { |
| 1085 | const bool via_header_index = | 768 | const bool via_header_index = |
| 1086 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 769 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| @@ -1128,6 +811,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { | |||
| 1128 | } | 811 | } |
| 1129 | } | 812 | } |
| 1130 | 813 | ||
| 814 | void RasterizerOpenGL::SyncState() { | ||
| 815 | SyncViewport(); | ||
| 816 | SyncRasterizeEnable(); | ||
| 817 | SyncPolygonModes(); | ||
| 818 | SyncColorMask(); | ||
| 819 | SyncFragmentColorClampState(); | ||
| 820 | SyncMultiSampleState(); | ||
| 821 | SyncDepthTestState(); | ||
| 822 | SyncDepthClamp(); | ||
| 823 | SyncStencilTestState(); | ||
| 824 | SyncBlendState(); | ||
| 825 | SyncLogicOpState(); | ||
| 826 | SyncCullMode(); | ||
| 827 | SyncPrimitiveRestart(); | ||
| 828 | SyncScissorTest(); | ||
| 829 | SyncPointState(); | ||
| 830 | SyncLineState(); | ||
| 831 | SyncPolygonOffset(); | ||
| 832 | SyncAlphaTest(); | ||
| 833 | SyncFramebufferSRGB(); | ||
| 834 | SyncVertexFormats(); | ||
| 835 | SyncVertexInstances(); | ||
| 836 | } | ||
| 837 | |||
| 1131 | void RasterizerOpenGL::SyncViewport() { | 838 | void RasterizerOpenGL::SyncViewport() { |
| 1132 | auto& flags = maxwell3d.dirty.flags; | 839 | auto& flags = maxwell3d.dirty.flags; |
| 1133 | const auto& regs = maxwell3d.regs; | 840 | const auto& regs = maxwell3d.regs; |
| @@ -1163,9 +870,11 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 1163 | if (regs.screen_y_control.y_negate != 0) { | 870 | if (regs.screen_y_control.y_negate != 0) { |
| 1164 | flip_y = !flip_y; | 871 | flip_y = !flip_y; |
| 1165 | } | 872 | } |
| 1166 | glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, | 873 | const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne; |
| 1167 | regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE | 874 | const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; |
| 1168 | : GL_NEGATIVE_ONE_TO_ONE); | 875 | const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE; |
| 876 | state_tracker.ClipControl(origin, depth); | ||
| 877 | state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); | ||
| 1169 | } | 878 | } |
| 1170 | 879 | ||
| 1171 | if (dirty_viewport) { | 880 | if (dirty_viewport) { |
| @@ -1649,36 +1358,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | |||
| 1649 | if (regs.tfb_enabled == 0) { | 1358 | if (regs.tfb_enabled == 0) { |
| 1650 | return; | 1359 | return; |
| 1651 | } | 1360 | } |
| 1652 | |||
| 1653 | if (device.UseAssemblyShaders()) { | 1361 | if (device.UseAssemblyShaders()) { |
| 1654 | SyncTransformFeedback(); | 1362 | SyncTransformFeedback(); |
| 1655 | } | 1363 | } |
| 1656 | |||
| 1657 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 1364 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 1658 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 1365 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 1659 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 1366 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| 1660 | 1367 | UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); | |
| 1661 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1662 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1663 | if (!binding.buffer_enable) { | ||
| 1664 | if (enabled_transform_feedback_buffers[index]) { | ||
| 1665 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||
| 1666 | 0); | ||
| 1667 | } | ||
| 1668 | enabled_transform_feedback_buffers[index] = false; | ||
| 1669 | continue; | ||
| 1670 | } | ||
| 1671 | enabled_transform_feedback_buffers[index] = true; | ||
| 1672 | |||
| 1673 | auto& tfb_buffer = transform_feedback_buffers[index]; | ||
| 1674 | tfb_buffer.Create(); | ||
| 1675 | |||
| 1676 | const GLuint handle = tfb_buffer.handle; | ||
| 1677 | const std::size_t size = binding.buffer_size; | ||
| 1678 | glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||
| 1679 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||
| 1680 | static_cast<GLsizeiptr>(size)); | ||
| 1681 | } | ||
| 1682 | 1368 | ||
| 1683 | // We may have to call BeginTransformFeedbackNV here since they seem to call different | 1369 | // We may have to call BeginTransformFeedbackNV here since they seem to call different |
| 1684 | // implementations on Nvidia's driver (the pointer is different) but we are using | 1370 | // implementations on Nvidia's driver (the pointer is different) but we are using |
| @@ -1692,23 +1378,7 @@ void RasterizerOpenGL::EndTransformFeedback() { | |||
| 1692 | if (regs.tfb_enabled == 0) { | 1378 | if (regs.tfb_enabled == 0) { |
| 1693 | return; | 1379 | return; |
| 1694 | } | 1380 | } |
| 1695 | |||
| 1696 | glEndTransformFeedback(); | 1381 | glEndTransformFeedback(); |
| 1697 | |||
| 1698 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1699 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1700 | if (!binding.buffer_enable) { | ||
| 1701 | continue; | ||
| 1702 | } | ||
| 1703 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 1704 | |||
| 1705 | const GLuint handle = transform_feedback_buffers[index].handle; | ||
| 1706 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 1707 | const std::size_t size = binding.buffer_size; | ||
| 1708 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 1709 | glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, | ||
| 1710 | static_cast<GLsizeiptr>(size)); | ||
| 1711 | } | ||
| 1712 | } | 1382 | } |
| 1713 | 1383 | ||
| 1714 | } // namespace OpenGL | 1384 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 82e03e677..31d69a94c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 33 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 33 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/shader/async_shaders.h" | 34 | #include "video_core/shader/async_shaders.h" |
| 36 | #include "video_core/textures/texture.h" | 35 | #include "video_core/textures/texture.h" |
| @@ -47,6 +46,11 @@ namespace Tegra { | |||
| 47 | class MemoryManager; | 46 | class MemoryManager; |
| 48 | } | 47 | } |
| 49 | 48 | ||
| 49 | namespace Vulkan { | ||
| 50 | class Device; | ||
| 51 | class MemoryAllocator; | ||
| 52 | } // namespace Vulkan | ||
| 53 | |||
| 50 | namespace OpenGL { | 54 | namespace OpenGL { |
| 51 | 55 | ||
| 52 | struct ScreenInfo; | 56 | struct ScreenInfo; |
| @@ -63,6 +67,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | |||
| 63 | public: | 67 | public: |
| 64 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 68 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 65 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 69 | Core::Memory::Memory& cpu_memory_, const Device& device_, |
| 70 | const Vulkan::Device* vulkan_device, | ||
| 71 | Vulkan::MemoryAllocator* vulkan_memory_allocator, | ||
| 66 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 72 | ScreenInfo& screen_info_, ProgramManager& program_manager_, |
| 67 | StateTracker& state_tracker_); | 73 | StateTracker& state_tracker_); |
| 68 | ~RasterizerOpenGL() override; | 74 | ~RasterizerOpenGL() override; |
| @@ -72,6 +78,7 @@ public: | |||
| 72 | void DispatchCompute(GPUVAddr code_addr) override; | 78 | void DispatchCompute(GPUVAddr code_addr) override; |
| 73 | void ResetCounter(VideoCore::QueryType type) override; | 79 | void ResetCounter(VideoCore::QueryType type) override; |
| 74 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 80 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 81 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||
| 75 | void FlushAll() override; | 82 | void FlushAll() override; |
| 76 | void FlushRegion(VAddr addr, u64 size) override; | 83 | void FlushRegion(VAddr addr, u64 size) override; |
| 77 | bool MustFlushRegion(VAddr addr, u64 size) override; | 84 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| @@ -119,27 +126,6 @@ private: | |||
| 119 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, | 126 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, |
| 120 | size_t& image_view_index, size_t& texture_index, size_t& image_index); | 127 | size_t& image_view_index, size_t& texture_index, size_t& image_index); |
| 121 | 128 | ||
| 122 | /// Configures the current constbuffers to use for the draw command. | ||
| 123 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); | ||
| 124 | |||
| 125 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 126 | void SetupComputeConstBuffers(Shader* kernel); | ||
| 127 | |||
| 128 | /// Configures a constant buffer. | ||
| 129 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 130 | const ConstBufferEntry& entry, bool use_unified, | ||
| 131 | std::size_t unified_offset); | ||
| 132 | |||
| 133 | /// Configures the current global memory entries to use for the draw command. | ||
| 134 | void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); | ||
| 135 | |||
| 136 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 137 | void SetupComputeGlobalMemory(Shader* kernel); | ||
| 138 | |||
| 139 | /// Configures a global memory buffer. | ||
| 140 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 141 | size_t size, BindlessSSBO* ssbo); | ||
| 142 | |||
| 143 | /// Configures the current textures to use for the draw command. | 129 | /// Configures the current textures to use for the draw command. |
| 144 | void SetupDrawTextures(const Shader* shader, size_t stage_index); | 130 | void SetupDrawTextures(const Shader* shader, size_t stage_index); |
| 145 | 131 | ||
| @@ -152,6 +138,9 @@ private: | |||
| 152 | /// Configures images in a compute shader. | 138 | /// Configures images in a compute shader. |
| 153 | void SetupComputeImages(const Shader* shader); | 139 | void SetupComputeImages(const Shader* shader); |
| 154 | 140 | ||
| 141 | /// Syncs state to match guest's | ||
| 142 | void SyncState(); | ||
| 143 | |||
| 155 | /// Syncs the viewport and depth range to match the guest state | 144 | /// Syncs the viewport and depth range to match the guest state |
| 156 | void SyncViewport(); | 145 | void SyncViewport(); |
| 157 | 146 | ||
| @@ -215,6 +204,12 @@ private: | |||
| 215 | /// Syncs the framebuffer sRGB state to match the guest state | 204 | /// Syncs the framebuffer sRGB state to match the guest state |
| 216 | void SyncFramebufferSRGB(); | 205 | void SyncFramebufferSRGB(); |
| 217 | 206 | ||
| 207 | /// Syncs vertex formats to match the guest state | ||
| 208 | void SyncVertexFormats(); | ||
| 209 | |||
| 210 | /// Syncs vertex instances to match the guest state | ||
| 211 | void SyncVertexInstances(); | ||
| 212 | |||
| 218 | /// Syncs transform feedback state to match guest state | 213 | /// Syncs transform feedback state to match guest state |
| 219 | /// @note Only valid on assembly shaders | 214 | /// @note Only valid on assembly shaders |
| 220 | void SyncTransformFeedback(); | 215 | void SyncTransformFeedback(); |
| @@ -225,19 +220,7 @@ private: | |||
| 225 | /// End a transform feedback | 220 | /// End a transform feedback |
| 226 | void EndTransformFeedback(); | 221 | void EndTransformFeedback(); |
| 227 | 222 | ||
| 228 | std::size_t CalculateVertexArraysSize() const; | 223 | void SetupShaders(bool is_indexed); |
| 229 | |||
| 230 | std::size_t CalculateIndexBufferSize() const; | ||
| 231 | |||
| 232 | /// Updates the current vertex format | ||
| 233 | void SetupVertexFormat(); | ||
| 234 | |||
| 235 | void SetupVertexBuffer(); | ||
| 236 | void SetupVertexInstances(); | ||
| 237 | |||
| 238 | GLintptr SetupIndexBuffer(); | ||
| 239 | |||
| 240 | void SetupShaders(); | ||
| 241 | 224 | ||
| 242 | Tegra::GPU& gpu; | 225 | Tegra::GPU& gpu; |
| 243 | Tegra::Engines::Maxwell3D& maxwell3d; | 226 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -249,12 +232,12 @@ private: | |||
| 249 | ProgramManager& program_manager; | 232 | ProgramManager& program_manager; |
| 250 | StateTracker& state_tracker; | 233 | StateTracker& state_tracker; |
| 251 | 234 | ||
| 252 | OGLStreamBuffer stream_buffer; | ||
| 253 | TextureCacheRuntime texture_cache_runtime; | 235 | TextureCacheRuntime texture_cache_runtime; |
| 254 | TextureCache texture_cache; | 236 | TextureCache texture_cache; |
| 237 | BufferCacheRuntime buffer_cache_runtime; | ||
| 238 | BufferCache buffer_cache; | ||
| 255 | ShaderCacheOpenGL shader_cache; | 239 | ShaderCacheOpenGL shader_cache; |
| 256 | QueryCache query_cache; | 240 | QueryCache query_cache; |
| 257 | OGLBufferCache buffer_cache; | ||
| 258 | FenceManagerOpenGL fence_manager; | 241 | FenceManagerOpenGL fence_manager; |
| 259 | 242 | ||
| 260 | VideoCommon::Shader::AsyncShaders async_shaders; | 243 | VideoCommon::Shader::AsyncShaders async_shaders; |
| @@ -262,20 +245,8 @@ private: | |||
| 262 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 245 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 263 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 246 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 264 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | 247 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
| 265 | std::array<GLuint, MAX_TEXTURES> texture_handles; | 248 | std::array<GLuint, MAX_TEXTURES> texture_handles{}; |
| 266 | std::array<GLuint, MAX_IMAGES> image_handles; | 249 | std::array<GLuint, MAX_IMAGES> image_handles{}; |
| 267 | |||
| 268 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 269 | transform_feedback_buffers; | ||
| 270 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 271 | enabled_transform_feedback_buffers; | ||
| 272 | |||
| 273 | static constexpr std::size_t NUM_CONSTANT_BUFFERS = | ||
| 274 | Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 275 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 276 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | ||
| 277 | std::size_t current_cbuf = 0; | ||
| 278 | OGLBuffer unified_uniform_buffer; | ||
| 279 | 250 | ||
| 280 | /// Number of commands queued to the OpenGL driver. Resetted on flush. | 251 | /// Number of commands queued to the OpenGL driver. Resetted on flush. |
| 281 | std::size_t num_queued_commands = 0; | 252 | std::size_t num_queued_commands = 0; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0e34a0f20..3428e5e21 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -171,12 +171,6 @@ void OGLBuffer::Release() { | |||
| 171 | handle = 0; | 171 | handle = 0; |
| 172 | } | 172 | } |
| 173 | 173 | ||
| 174 | void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { | ||
| 175 | ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); | ||
| 176 | |||
| 177 | glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); | ||
| 178 | } | ||
| 179 | |||
| 180 | void OGLSync::Create() { | 174 | void OGLSync::Create() { |
| 181 | if (handle != 0) | 175 | if (handle != 0) |
| 182 | return; | 176 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f48398669..552d79db4 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -234,9 +234,6 @@ public: | |||
| 234 | /// Deletes the internal OpenGL resource | 234 | /// Deletes the internal OpenGL resource |
| 235 | void Release(); | 235 | void Release(); |
| 236 | 236 | ||
| 237 | // Converts the buffer into a stream copy buffer with a fixed size | ||
| 238 | void MakeStreamCopy(std::size_t buffer_size); | ||
| 239 | |||
| 240 | GLuint handle = 0; | 237 | GLuint handle = 0; |
| 241 | }; | 238 | }; |
| 242 | 239 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c35b71b6b..ac78d344c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); | 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); |
| 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); | 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); |
| 66 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | 67 | constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt |
| 68 | #define ftou floatBitsToUint | 68 | #define ftou floatBitsToUint |
| 69 | #define itof intBitsToFloat | 69 | #define itof intBitsToFloat |
| 70 | #define utof uintBitsToFloat | 70 | #define utof uintBitsToFloat |
| @@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | |||
| 77 | 77 | ||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); |
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); |
| 80 | |||
| 81 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 82 | float y_direction; | ||
| 83 | }}; | ||
| 84 | )"; | 80 | )"; |
| 85 | 81 | ||
| 86 | class ShaderWriter final { | 82 | class ShaderWriter final { |
| @@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 398 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 403 | } | 399 | } |
| 404 | 400 | ||
| 405 | bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 406 | const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); | ||
| 407 | // We waste one UBO for emulation | ||
| 408 | const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; | ||
| 409 | return num_ubos > num_available_ubos; | ||
| 410 | } | ||
| 411 | |||
| 412 | struct GenericVaryingDescription { | 401 | struct GenericVaryingDescription { |
| 413 | std::string name; | 402 | std::string name; |
| 414 | u8 first_element = 0; | 403 | u8 first_element = 0; |
| @@ -420,9 +409,8 @@ public: | |||
| 420 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | 409 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, |
| 421 | ShaderType stage_, std::string_view identifier_, | 410 | ShaderType stage_, std::string_view identifier_, |
| 422 | std::string_view suffix_) | 411 | std::string_view suffix_) |
| 423 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, | 412 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, |
| 424 | suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ | 413 | identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { |
| 425 | UseUnifiedUniforms(device_, ir_, stage_)} { | ||
| 426 | if (stage != ShaderType::Compute) { | 414 | if (stage != ShaderType::Compute) { |
| 427 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | 415 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); |
| 428 | } | 416 | } |
| @@ -516,7 +504,8 @@ private: | |||
| 516 | if (!identifier.empty()) { | 504 | if (!identifier.empty()) { |
| 517 | code.AddLine("// {}", identifier); | 505 | code.AddLine("// {}", identifier); |
| 518 | } | 506 | } |
| 519 | code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); | 507 | const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); |
| 508 | code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); | ||
| 520 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | 509 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 521 | if (device.HasShaderBallot()) { | 510 | if (device.HasShaderBallot()) { |
| 522 | code.AddLine("#extension GL_ARB_shader_ballot : require"); | 511 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| @@ -542,7 +531,7 @@ private: | |||
| 542 | 531 | ||
| 543 | code.AddNewLine(); | 532 | code.AddNewLine(); |
| 544 | 533 | ||
| 545 | code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); | 534 | code.AddLine(COMMON_DECLARATIONS); |
| 546 | } | 535 | } |
| 547 | 536 | ||
| 548 | void DeclareVertex() { | 537 | void DeclareVertex() { |
| @@ -865,17 +854,6 @@ private: | |||
| 865 | } | 854 | } |
| 866 | 855 | ||
| 867 | void DeclareConstantBuffers() { | 856 | void DeclareConstantBuffers() { |
| 868 | if (use_unified_uniforms) { | ||
| 869 | const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + | ||
| 870 | static_cast<u32>(ir.GetGlobalMemory().size()); | ||
| 871 | code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", | ||
| 872 | binding); | ||
| 873 | code.AddLine(" uint cbufs[];"); | ||
| 874 | code.AddLine("}};"); | ||
| 875 | code.AddNewLine(); | ||
| 876 | return; | ||
| 877 | } | ||
| 878 | |||
| 879 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | 857 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; |
| 880 | for (const auto& [index, info] : ir.GetConstantBuffers()) { | 858 | for (const auto& [index, info] : ir.GetConstantBuffers()) { |
| 881 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); | 859 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); |
| @@ -1081,29 +1059,17 @@ private: | |||
| 1081 | 1059 | ||
| 1082 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | 1060 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { |
| 1083 | const Node offset = cbuf->GetOffset(); | 1061 | const Node offset = cbuf->GetOffset(); |
| 1084 | const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; | ||
| 1085 | 1062 | ||
| 1086 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | 1063 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |
| 1087 | // Direct access | 1064 | // Direct access |
| 1088 | const u32 offset_imm = immediate->GetValue(); | 1065 | const u32 offset_imm = immediate->GetValue(); |
| 1089 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | 1066 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); |
| 1090 | if (use_unified_uniforms) { | 1067 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), |
| 1091 | return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), | 1068 | offset_imm / (4 * 4), (offset_imm / 4) % 4), |
| 1092 | Type::Uint}; | ||
| 1093 | } else { | ||
| 1094 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1095 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1096 | Type::Uint}; | ||
| 1097 | } | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | // Indirect access | ||
| 1101 | if (use_unified_uniforms) { | ||
| 1102 | return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, | ||
| 1103 | Visit(offset).AsUint()), | ||
| 1104 | Type::Uint}; | 1069 | Type::Uint}; |
| 1105 | } | 1070 | } |
| 1106 | 1071 | ||
| 1072 | // Indirect access | ||
| 1107 | const std::string final_offset = code.GenerateTemporary(); | 1073 | const std::string final_offset = code.GenerateTemporary(); |
| 1108 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | 1074 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); |
| 1109 | 1075 | ||
| @@ -2293,7 +2259,6 @@ private: | |||
| 2293 | } | 2259 | } |
| 2294 | } | 2260 | } |
| 2295 | } | 2261 | } |
| 2296 | |||
| 2297 | if (header.ps.omap.depth) { | 2262 | if (header.ps.omap.depth) { |
| 2298 | // The depth output is always 2 registers after the last color output, and current_reg | 2263 | // The depth output is always 2 registers after the last color output, and current_reg |
| 2299 | // already contains one past the last color register. | 2264 | // already contains one past the last color register. |
| @@ -2337,7 +2302,8 @@ private: | |||
| 2337 | } | 2302 | } |
| 2338 | 2303 | ||
| 2339 | Expression YNegate(Operation operation) { | 2304 | Expression YNegate(Operation operation) { |
| 2340 | return {"y_direction", Type::Float}; | 2305 | // Y_NEGATE is mapped to this uniform value |
| 2306 | return {"gl_FrontMaterial.ambient.a", Type::Float}; | ||
| 2341 | } | 2307 | } |
| 2342 | 2308 | ||
| 2343 | template <u32 element> | 2309 | template <u32 element> |
| @@ -2787,7 +2753,6 @@ private: | |||
| 2787 | const std::string_view identifier; | 2753 | const std::string_view identifier; |
| 2788 | const std::string_view suffix; | 2754 | const std::string_view suffix; |
| 2789 | const Header header; | 2755 | const Header header; |
| 2790 | const bool use_unified_uniforms; | ||
| 2791 | std::unordered_map<u8, VaryingTFB> transform_feedback; | 2756 | std::unordered_map<u8, VaryingTFB> transform_feedback; |
| 2792 | 2757 | ||
| 2793 | ShaderWriter code; | 2758 | ShaderWriter code; |
| @@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s | |||
| 3003 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | 2968 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { |
| 3004 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | 2969 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; |
| 3005 | } | 2970 | } |
| 2971 | for (const auto& buffer : entries.const_buffers) { | ||
| 2972 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 2973 | } | ||
| 3006 | entries.shader_length = ir.GetLength(); | 2974 | entries.shader_length = ir.GetLength(); |
| 3007 | entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); | ||
| 3008 | return entries; | 2975 | return entries; |
| 3009 | } | 2976 | } |
| 3010 | 2977 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index be68994bb..0397a000c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -55,7 +55,7 @@ struct ShaderEntries { | |||
| 55 | std::vector<ImageEntry> images; | 55 | std::vector<ImageEntry> images; |
| 56 | std::size_t shader_length{}; | 56 | std::size_t shader_length{}; |
| 57 | u32 clip_distances{}; | 57 | u32 clip_distances{}; |
| 58 | bool use_unified_uniforms{}; | 58 | u32 enabled_uniform_buffers{}; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 60e6fa39f..dbdf5230f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) { | |||
| 36 | FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); | 36 | FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | void SetupDirtyVertexArrays(Tables& tables) { | 39 | void SetupDirtyVertexInstances(Tables& tables) { |
| 40 | static constexpr std::size_t num_array = 3; | ||
| 41 | static constexpr std::size_t instance_base_offset = 3; | 40 | static constexpr std::size_t instance_base_offset = 3; |
| 42 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | 41 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { |
| 43 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | 42 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); |
| 44 | const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||
| 45 | |||
| 46 | FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||
| 47 | FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||
| 48 | |||
| 49 | const std::size_t instance_array_offset = array_offset + instance_base_offset; | 43 | const std::size_t instance_array_offset = array_offset + instance_base_offset; |
| 50 | tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); | 44 | tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); |
| 51 | tables[1][instance_array_offset] = VertexInstances; | 45 | tables[1][instance_array_offset] = VertexInstances; |
| @@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) { | |||
| 217 | StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { | 211 | StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { |
| 218 | auto& dirty = gpu.Maxwell3D().dirty; | 212 | auto& dirty = gpu.Maxwell3D().dirty; |
| 219 | auto& tables = dirty.tables; | 213 | auto& tables = dirty.tables; |
| 220 | SetupDirtyRenderTargets(tables); | 214 | SetupDirtyFlags(tables); |
| 221 | SetupDirtyColorMasks(tables); | 215 | SetupDirtyColorMasks(tables); |
| 222 | SetupDirtyViewports(tables); | 216 | SetupDirtyViewports(tables); |
| 223 | SetupDirtyScissors(tables); | 217 | SetupDirtyScissors(tables); |
| 224 | SetupDirtyVertexArrays(tables); | 218 | SetupDirtyVertexInstances(tables); |
| 225 | SetupDirtyVertexFormat(tables); | 219 | SetupDirtyVertexFormat(tables); |
| 226 | SetupDirtyShaders(tables); | 220 | SetupDirtyShaders(tables); |
| 227 | SetupDirtyPolygonModes(tables); | 221 | SetupDirtyPolygonModes(tables); |
| @@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} | |||
| 241 | SetupDirtyClipControl(tables); | 235 | SetupDirtyClipControl(tables); |
| 242 | SetupDirtyDepthClampEnabled(tables); | 236 | SetupDirtyDepthClampEnabled(tables); |
| 243 | SetupDirtyMisc(tables); | 237 | SetupDirtyMisc(tables); |
| 244 | |||
| 245 | auto& store = dirty.on_write_stores; | ||
| 246 | store[VertexBuffers] = true; | ||
| 247 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | ||
| 248 | store[VertexBuffer0 + i] = true; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | void StateTracker::InvalidateStreamBuffer() { | ||
| 253 | flags[Dirty::VertexBuffers] = true; | ||
| 254 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 255 | flags[index] = true; | ||
| 256 | } | ||
| 257 | } | 238 | } |
| 258 | 239 | ||
| 259 | } // namespace OpenGL | 240 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 574615d3c..94c905116 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -28,10 +28,6 @@ enum : u8 { | |||
| 28 | VertexFormat0, | 28 | VertexFormat0, |
| 29 | VertexFormat31 = VertexFormat0 + 31, | 29 | VertexFormat31 = VertexFormat0 + 31, |
| 30 | 30 | ||
| 31 | VertexBuffers, | ||
| 32 | VertexBuffer0, | ||
| 33 | VertexBuffer31 = VertexBuffer0 + 31, | ||
| 34 | |||
| 35 | VertexInstances, | 31 | VertexInstances, |
| 36 | VertexInstance0, | 32 | VertexInstance0, |
| 37 | VertexInstance31 = VertexInstance0 + 31, | 33 | VertexInstance31 = VertexInstance0 + 31, |
| @@ -92,8 +88,6 @@ class StateTracker { | |||
| 92 | public: | 88 | public: |
| 93 | explicit StateTracker(Tegra::GPU& gpu); | 89 | explicit StateTracker(Tegra::GPU& gpu); |
| 94 | 90 | ||
| 95 | void InvalidateStreamBuffer(); | ||
| 96 | |||
| 97 | void BindIndexBuffer(GLuint new_index_buffer) { | 91 | void BindIndexBuffer(GLuint new_index_buffer) { |
| 98 | if (index_buffer == new_index_buffer) { | 92 | if (index_buffer == new_index_buffer) { |
| 99 | return; | 93 | return; |
| @@ -110,13 +104,32 @@ public: | |||
| 110 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); | 104 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); |
| 111 | } | 105 | } |
| 112 | 106 | ||
| 107 | void ClipControl(GLenum new_origin, GLenum new_depth) { | ||
| 108 | if (new_origin == origin && new_depth == depth) { | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | origin = new_origin; | ||
| 112 | depth = new_depth; | ||
| 113 | glClipControl(origin, depth); | ||
| 114 | } | ||
| 115 | |||
| 116 | void SetYNegate(bool new_y_negate) { | ||
| 117 | if (new_y_negate == y_negate) { | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a | ||
| 121 | y_negate = new_y_negate; | ||
| 122 | const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f}; | ||
| 123 | glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data()); | ||
| 124 | } | ||
| 125 | |||
| 113 | void NotifyScreenDrawVertexArray() { | 126 | void NotifyScreenDrawVertexArray() { |
| 114 | flags[OpenGL::Dirty::VertexFormats] = true; | 127 | flags[OpenGL::Dirty::VertexFormats] = true; |
| 115 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; | 128 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; |
| 116 | flags[OpenGL::Dirty::VertexFormat0 + 1] = true; | 129 | flags[OpenGL::Dirty::VertexFormat0 + 1] = true; |
| 117 | 130 | ||
| 118 | flags[OpenGL::Dirty::VertexBuffers] = true; | 131 | flags[VideoCommon::Dirty::VertexBuffers] = true; |
| 119 | flags[OpenGL::Dirty::VertexBuffer0] = true; | 132 | flags[VideoCommon::Dirty::VertexBuffer0] = true; |
| 120 | 133 | ||
| 121 | flags[OpenGL::Dirty::VertexInstances] = true; | 134 | flags[OpenGL::Dirty::VertexInstances] = true; |
| 122 | flags[OpenGL::Dirty::VertexInstance0 + 0] = true; | 135 | flags[OpenGL::Dirty::VertexInstance0 + 0] = true; |
| @@ -202,6 +215,9 @@ private: | |||
| 202 | 215 | ||
| 203 | GLuint framebuffer = 0; | 216 | GLuint framebuffer = 0; |
| 204 | GLuint index_buffer = 0; | 217 | GLuint index_buffer = 0; |
| 218 | GLenum origin = GL_LOWER_LEFT; | ||
| 219 | GLenum depth = GL_NEGATIVE_ONE_TO_ONE; | ||
| 220 | bool y_negate = false; | ||
| 205 | }; | 221 | }; |
| 206 | 222 | ||
| 207 | } // namespace OpenGL | 223 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index e0819cdf2..bfb992a79 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -1,70 +1,64 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <tuple> | 5 | #include <array> |
| 6 | #include <vector> | 6 | #include <memory> |
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 7 | 10 | ||
| 8 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 9 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 14 | 14 | ||
| 15 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | ||
| 16 | MP_RGB(128, 128, 192)); | ||
| 17 | |||
| 18 | namespace OpenGL { | 15 | namespace OpenGL { |
| 19 | 16 | ||
| 20 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) | 17 | StreamBuffer::StreamBuffer() { |
| 21 | : state_tracker{state_tracker_} { | 18 | static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; |
| 22 | gl_buffer.Create(); | 19 | buffer.Create(); |
| 23 | 20 | glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); | |
| 24 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; | 21 | glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); |
| 25 | glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); | 22 | mapped_pointer = |
| 26 | mapped_ptr = static_cast<u8*>( | 23 | static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); |
| 27 | glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); | 24 | for (OGLSync& sync : fences) { |
| 28 | 25 | sync.Create(); | |
| 29 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { | ||
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); | ||
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | ||
| 32 | } | 26 | } |
| 33 | } | 27 | } |
| 34 | 28 | ||
| 35 | OGLStreamBuffer::~OGLStreamBuffer() { | 29 | std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { |
| 36 | glUnmapNamedBuffer(gl_buffer.handle); | 30 | ASSERT(size < REGION_SIZE); |
| 37 | gl_buffer.Release(); | 31 | for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; |
| 38 | } | 32 | ++region) { |
| 39 | 33 | fences[region].Create(); | |
| 40 | std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | ||
| 41 | ASSERT(size <= BUFFER_SIZE); | ||
| 42 | ASSERT(alignment <= BUFFER_SIZE); | ||
| 43 | mapped_size = size; | ||
| 44 | |||
| 45 | if (alignment > 0) { | ||
| 46 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); | ||
| 47 | } | 34 | } |
| 35 | used_iterator = iterator; | ||
| 48 | 36 | ||
| 49 | if (buffer_pos + size > BUFFER_SIZE) { | 37 | for (size_t region = Region(free_iterator) + 1, |
| 50 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | 38 | region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); |
| 51 | glInvalidateBufferData(gl_buffer.handle); | 39 | region < region_end; ++region) { |
| 52 | state_tracker.InvalidateStreamBuffer(); | 40 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); |
| 53 | 41 | fences[region].Release(); | |
| 54 | buffer_pos = 0; | ||
| 55 | } | 42 | } |
| 56 | 43 | if (iterator + size > free_iterator) { | |
| 57 | return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); | 44 | free_iterator = iterator + size; |
| 58 | } | ||
| 59 | |||
| 60 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | ||
| 61 | ASSERT(size <= mapped_size); | ||
| 62 | |||
| 63 | if (size > 0) { | ||
| 64 | glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); | ||
| 65 | } | 45 | } |
| 66 | 46 | if (iterator + size > STREAM_BUFFER_SIZE) { | |
| 67 | buffer_pos += size; | 47 | for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { |
| 48 | fences[region].Create(); | ||
| 49 | } | ||
| 50 | used_iterator = 0; | ||
| 51 | iterator = 0; | ||
| 52 | free_iterator = size; | ||
| 53 | |||
| 54 | for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||
| 55 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||
| 56 | fences[region].Release(); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | const size_t offset = iterator; | ||
| 60 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 61 | return {std::span(mapped_pointer + offset, size), offset}; | ||
| 68 | } | 62 | } |
| 69 | 63 | ||
| 70 | } // namespace OpenGL | 64 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index dd9cf67eb..6dbb6bfba 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -1,9 +1,12 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <span> | ||
| 7 | #include <utility> | 10 | #include <utility> |
| 8 | 11 | ||
| 9 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| @@ -13,48 +16,35 @@ | |||
| 13 | 16 | ||
| 14 | namespace OpenGL { | 17 | namespace OpenGL { |
| 15 | 18 | ||
| 16 | class Device; | 19 | class StreamBuffer { |
| 17 | class StateTracker; | 20 | static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024; |
| 21 | static constexpr size_t NUM_SYNCS = 16; | ||
| 22 | static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS; | ||
| 23 | static constexpr size_t MAX_ALIGNMENT = 256; | ||
| 24 | static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0); | ||
| 25 | static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0); | ||
| 26 | static_assert(REGION_SIZE % MAX_ALIGNMENT == 0); | ||
| 18 | 27 | ||
| 19 | class OGLStreamBuffer : private NonCopyable { | ||
| 20 | public: | 28 | public: |
| 21 | explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); | 29 | explicit StreamBuffer(); |
| 22 | ~OGLStreamBuffer(); | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | ||
| 26 | * and the optional alignment requirement. | ||
| 27 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | ||
| 28 | * The return values are the pointer to the new chunk, and the offset within the buffer. | ||
| 29 | * The actual used size must be specified on unmapping the chunk. | ||
| 30 | */ | ||
| 31 | std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); | ||
| 32 | |||
| 33 | void Unmap(GLsizeiptr size); | ||
| 34 | |||
| 35 | GLuint Handle() const { | ||
| 36 | return gl_buffer.handle; | ||
| 37 | } | ||
| 38 | 30 | ||
| 39 | u64 Address() const { | 31 | [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept; |
| 40 | return gpu_address; | ||
| 41 | } | ||
| 42 | 32 | ||
| 43 | GLsizeiptr Size() const noexcept { | 33 | [[nodiscard]] GLuint Handle() const noexcept { |
| 44 | return BUFFER_SIZE; | 34 | return buffer.handle; |
| 45 | } | 35 | } |
| 46 | 36 | ||
| 47 | private: | 37 | private: |
| 48 | static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; | 38 | [[nodiscard]] static size_t Region(size_t offset) noexcept { |
| 49 | 39 | return offset / REGION_SIZE; | |
| 50 | StateTracker& state_tracker; | 40 | } |
| 51 | |||
| 52 | OGLBuffer gl_buffer; | ||
| 53 | 41 | ||
| 54 | GLuint64EXT gpu_address = 0; | 42 | size_t iterator = 0; |
| 55 | GLintptr buffer_pos = 0; | 43 | size_t used_iterator = 0; |
| 56 | GLsizeiptr mapped_size = 0; | 44 | size_t free_iterator = 0; |
| 57 | u8* mapped_ptr = nullptr; | 45 | u8* mapped_pointer = nullptr; |
| 46 | OGLBuffer buffer; | ||
| 47 | std::array<OGLSync, NUM_SYNCS> fences; | ||
| 58 | }; | 48 | }; |
| 59 | 49 | ||
| 60 | } // namespace OpenGL | 50 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 546cb6d00..37572ab28 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 398 | 398 | ||
| 399 | } // Anonymous namespace | 399 | } // Anonymous namespace |
| 400 | 400 | ||
| 401 | ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) | ||
| 402 | : span(map, size), sync{sync_}, handle{handle_} {} | ||
| 403 | |||
| 404 | ImageBufferMap::~ImageBufferMap() { | 401 | ImageBufferMap::~ImageBufferMap() { |
| 405 | if (sync) { | 402 | if (sync) { |
| 406 | sync->Create(); | 403 | sync->Create(); |
| @@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() { | |||
| 487 | glFinish(); | 484 | glFinish(); |
| 488 | } | 485 | } |
| 489 | 486 | ||
| 490 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | 487 | ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { |
| 491 | return upload_buffers.RequestMap(size, true); | 488 | return upload_buffers.RequestMap(size, true); |
| 492 | } | 489 | } |
| 493 | 490 | ||
| 494 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | 491 | ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 495 | return download_buffers.RequestMap(size, false); | 492 | return download_buffers.RequestMap(size, false); |
| 496 | } | 493 | } |
| 497 | 494 | ||
| @@ -596,7 +593,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_ | |||
| 596 | bool insert_fence) { | 593 | bool insert_fence) { |
| 597 | const size_t index = RequestBuffer(requested_size); | 594 | const size_t index = RequestBuffer(requested_size); |
| 598 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | 595 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; |
| 599 | return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); | 596 | return ImageBufferMap{ |
| 597 | .mapped_span = std::span(maps[index], requested_size), | ||
| 598 | .sync = sync, | ||
| 599 | .buffer = buffers[index].handle, | ||
| 600 | }; | ||
| 600 | } | 601 | } |
| 601 | 602 | ||
| 602 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { | 603 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { |
| @@ -711,7 +712,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 711 | 712 | ||
| 712 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 713 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 713 | std::span<const VideoCommon::BufferImageCopy> copies) { | 714 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 714 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); | 715 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); |
| 715 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); | 716 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); |
| 716 | 717 | ||
| 717 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | 718 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
| @@ -735,7 +736,7 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 735 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 736 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, |
| 736 | std::span<const VideoCommon::BufferCopy> copies) { | 737 | std::span<const VideoCommon::BufferCopy> copies) { |
| 737 | for (const VideoCommon::BufferCopy& copy : copies) { | 738 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 738 | glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, | 739 | glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset, |
| 739 | copy.dst_offset, copy.size); | 740 | copy.dst_offset, copy.size); |
| 740 | } | 741 | } |
| 741 | } | 742 | } |
| @@ -744,7 +745,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | |||
| 744 | std::span<const VideoCommon::BufferImageCopy> copies) { | 745 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 745 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 746 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 746 | 747 | ||
| 747 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); | 748 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); |
| 748 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | 749 | glPixelStorei(GL_PACK_ALIGNMENT, 1); |
| 749 | 750 | ||
| 750 | u32 current_row_length = std::numeric_limits<u32>::max(); | 751 | u32 current_row_length = std::numeric_limits<u32>::max(); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 15b7c3676..60d08d6d6 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -31,23 +31,12 @@ using VideoCommon::NUM_RT; | |||
| 31 | using VideoCommon::Offset2D; | 31 | using VideoCommon::Offset2D; |
| 32 | using VideoCommon::RenderTargets; | 32 | using VideoCommon::RenderTargets; |
| 33 | 33 | ||
| 34 | class ImageBufferMap { | 34 | struct ImageBufferMap { |
| 35 | public: | ||
| 36 | explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); | ||
| 37 | ~ImageBufferMap(); | 35 | ~ImageBufferMap(); |
| 38 | 36 | ||
| 39 | GLuint Handle() const noexcept { | 37 | std::span<u8> mapped_span; |
| 40 | return handle; | ||
| 41 | } | ||
| 42 | |||
| 43 | std::span<u8> Span() const noexcept { | ||
| 44 | return span; | ||
| 45 | } | ||
| 46 | |||
| 47 | private: | ||
| 48 | std::span<u8> span; | ||
| 49 | OGLSync* sync; | 38 | OGLSync* sync; |
| 50 | GLuint handle; | 39 | GLuint buffer; |
| 51 | }; | 40 | }; |
| 52 | 41 | ||
| 53 | struct FormatProperties { | 42 | struct FormatProperties { |
| @@ -69,9 +58,9 @@ public: | |||
| 69 | 58 | ||
| 70 | void Finish(); | 59 | void Finish(); |
| 71 | 60 | ||
| 72 | ImageBufferMap MapUploadBuffer(size_t size); | 61 | ImageBufferMap UploadStagingBuffer(size_t size); |
| 73 | 62 | ||
| 74 | ImageBufferMap MapDownloadBuffer(size_t size); | 63 | ImageBufferMap DownloadStagingBuffer(size_t size); |
| 75 | 64 | ||
| 76 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 65 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 77 | 66 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7eb5ab17a..8fcb86581 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -27,11 +27,14 @@ | |||
| 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 28 | #include "video_core/renderer_opengl/renderer_opengl.h" | 28 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 29 | #include "video_core/textures/decoders.h" | 29 | #include "video_core/textures/decoders.h" |
| 30 | #include "video_core/vulkan_common/vulkan_debug_callback.h" | ||
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_instance.h" | ||
| 33 | #include "video_core/vulkan_common/vulkan_library.h" | ||
| 34 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 30 | 35 | ||
| 31 | namespace OpenGL { | 36 | namespace OpenGL { |
| 32 | |||
| 33 | namespace { | 37 | namespace { |
| 34 | |||
| 35 | constexpr GLint PositionLocation = 0; | 38 | constexpr GLint PositionLocation = 0; |
| 36 | constexpr GLint TexCoordLocation = 1; | 39 | constexpr GLint TexCoordLocation = 1; |
| 37 | constexpr GLint ModelViewMatrixLocation = 0; | 40 | constexpr GLint ModelViewMatrixLocation = 0; |
| @@ -125,25 +128,98 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 125 | } | 128 | } |
| 126 | } | 129 | } |
| 127 | 130 | ||
| 131 | Vulkan::vk::PhysicalDevice FindPhysicalDevice(Vulkan::vk::Instance& instance) { | ||
| 132 | using namespace Vulkan; | ||
| 133 | using UUID = std::array<GLubyte, GL_UUID_SIZE_EXT>; | ||
| 134 | |||
| 135 | GLint num_device_uuids; | ||
| 136 | glGetIntegerv(GL_NUM_DEVICE_UUIDS_EXT, &num_device_uuids); | ||
| 137 | std::vector<UUID> device_uuids(num_device_uuids); | ||
| 138 | for (GLint index = 0; index < num_device_uuids; ++index) { | ||
| 139 | glGetUnsignedBytei_vEXT(GL_DEVICE_UUID_EXT, 0, device_uuids[index].data()); | ||
| 140 | } | ||
| 141 | UUID driver_uuid; | ||
| 142 | glGetUnsignedBytevEXT(GL_DRIVER_UUID_EXT, driver_uuid.data()); | ||
| 143 | |||
| 144 | for (const VkPhysicalDevice raw_physical_device : instance.EnumeratePhysicalDevices()) { | ||
| 145 | VkPhysicalDeviceIDProperties device_id_properties{}; | ||
| 146 | device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; | ||
| 147 | |||
| 148 | VkPhysicalDeviceProperties2KHR properties{ | ||
| 149 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, | ||
| 150 | .pNext = &device_id_properties, | ||
| 151 | .properties{}, | ||
| 152 | }; | ||
| 153 | vk::PhysicalDevice physical_device(raw_physical_device, instance.Dispatch()); | ||
| 154 | physical_device.GetProperties2KHR(properties); | ||
| 155 | if (!std::ranges::equal(device_id_properties.driverUUID, driver_uuid)) { | ||
| 156 | continue; | ||
| 157 | } | ||
| 158 | const auto it = | ||
| 159 | std::ranges::find_if(device_uuids, [&device_id_properties, driver_uuid](UUID uuid) { | ||
| 160 | return std::ranges::equal(device_id_properties.deviceUUID, uuid); | ||
| 161 | }); | ||
| 162 | if (it != device_uuids.end()) { | ||
| 163 | return physical_device; | ||
| 164 | } | ||
| 165 | } | ||
| 166 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | ||
| 167 | } | ||
| 128 | } // Anonymous namespace | 168 | } // Anonymous namespace |
| 129 | 169 | ||
| 170 | struct VulkanObjects { | ||
| 171 | static std::unique_ptr<VulkanObjects> TryCreate() { | ||
| 172 | if (!GLAD_GL_EXT_memory_object) { | ||
| 173 | // Interop is not present | ||
| 174 | return nullptr; | ||
| 175 | } | ||
| 176 | const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))}; | ||
| 177 | if (vendor == "ATI Technologies Inc.") { | ||
| 178 | // Avoid using GL_EXT_memory_object on AMD, as it makes the GL driver crash | ||
| 179 | return nullptr; | ||
| 180 | } | ||
| 181 | if (!Settings::values.use_assembly_shaders.GetValue()) { | ||
| 182 | // We only need interop when assembly shaders are enabled | ||
| 183 | return nullptr; | ||
| 184 | } | ||
| 185 | #ifdef __linux__ | ||
| 186 | LOG_WARNING(Render_OpenGL, "Interop doesn't work on Linux at the moment"); | ||
| 187 | return nullptr; | ||
| 188 | #endif | ||
| 189 | try { | ||
| 190 | return std::make_unique<VulkanObjects>(); | ||
| 191 | } catch (const Vulkan::vk::Exception& exception) { | ||
| 192 | LOG_ERROR(Render_OpenGL, "Failed to initialize Vulkan objects with error: {}", | ||
| 193 | exception.what()); | ||
| 194 | return nullptr; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | Common::DynamicLibrary library{Vulkan::OpenLibrary()}; | ||
| 199 | Vulkan::vk::InstanceDispatch dld; | ||
| 200 | Vulkan::vk::Instance instance{Vulkan::CreateInstance(library, dld, VK_API_VERSION_1_1)}; | ||
| 201 | Vulkan::Device device{*instance, FindPhysicalDevice(instance), nullptr, dld}; | ||
| 202 | Vulkan::MemoryAllocator memory_allocator{device, true}; | ||
| 203 | }; | ||
| 204 | |||
| 130 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | 205 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, |
| 131 | Core::Frontend::EmuWindow& emu_window_, | 206 | Core::Frontend::EmuWindow& emu_window_, |
| 132 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 207 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |
| 133 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 208 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 134 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | 209 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, |
| 135 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device}, | 210 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, |
| 136 | rasterizer{emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker} { | 211 | vulkan_objects{VulkanObjects::TryCreate()}, device{vulkan_objects != nullptr}, |
| 212 | state_tracker{gpu}, program_manager{device}, | ||
| 213 | rasterizer(emu_window, gpu, cpu_memory, device, | ||
| 214 | vulkan_objects ? &vulkan_objects->device : nullptr, | ||
| 215 | vulkan_objects ? &vulkan_objects->memory_allocator : nullptr, screen_info, | ||
| 216 | program_manager, state_tracker) { | ||
| 137 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | 217 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |
| 138 | glEnable(GL_DEBUG_OUTPUT); | 218 | glEnable(GL_DEBUG_OUTPUT); |
| 139 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | 219 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); |
| 140 | glDebugMessageCallback(DebugHandler, nullptr); | 220 | glDebugMessageCallback(DebugHandler, nullptr); |
| 141 | } | 221 | } |
| 142 | AddTelemetryFields(); | 222 | AddTelemetryFields(); |
| 143 | |||
| 144 | if (!GLAD_GL_VERSION_4_6) { | ||
| 145 | throw std::runtime_error{"OpenGL 4.3 is not available"}; | ||
| 146 | } | ||
| 147 | InitOpenGLObjects(); | 223 | InitOpenGLObjects(); |
| 148 | } | 224 | } |
| 149 | 225 | ||
| @@ -280,6 +356,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 280 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | 356 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it |
| 281 | if (device.HasVertexBufferUnifiedMemory()) { | 357 | if (device.HasVertexBufferUnifiedMemory()) { |
| 282 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | 358 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); |
| 359 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 283 | 360 | ||
| 284 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | 361 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); |
| 285 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | 362 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, |
| @@ -412,6 +489,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 412 | 489 | ||
| 413 | program_manager.BindHostPipeline(pipeline.handle); | 490 | program_manager.BindHostPipeline(pipeline.handle); |
| 414 | 491 | ||
| 492 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 415 | glEnable(GL_CULL_FACE); | 493 | glEnable(GL_CULL_FACE); |
| 416 | if (screen_info.display_srgb) { | 494 | if (screen_info.display_srgb) { |
| 417 | glEnable(GL_FRAMEBUFFER_SRGB); | 495 | glEnable(GL_FRAMEBUFFER_SRGB); |
| @@ -430,7 +508,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 430 | glCullFace(GL_BACK); | 508 | glCullFace(GL_BACK); |
| 431 | glFrontFace(GL_CW); | 509 | glFrontFace(GL_CW); |
| 432 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | 510 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); |
| 433 | glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 434 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), | 511 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), |
| 435 | static_cast<GLfloat>(layout.height)); | 512 | static_cast<GLfloat>(layout.height)); |
| 436 | glDepthRangeIndexed(0, 0.0, 0.0); | 513 | glDepthRangeIndexed(0, 0.0, 0.0); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index e043a0ccb..f210190dd 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -38,6 +38,8 @@ class GPU; | |||
| 38 | 38 | ||
| 39 | namespace OpenGL { | 39 | namespace OpenGL { |
| 40 | 40 | ||
| 41 | struct VulkanObjects; | ||
| 42 | |||
| 41 | /// Structure used for storing information about the textures for the Switch screen | 43 | /// Structure used for storing information about the textures for the Switch screen |
| 42 | struct TextureInfo { | 44 | struct TextureInfo { |
| 43 | OGLTexture resource; | 45 | OGLTexture resource; |
| @@ -99,8 +101,11 @@ private: | |||
| 99 | Core::Memory::Memory& cpu_memory; | 101 | Core::Memory::Memory& cpu_memory; |
| 100 | Tegra::GPU& gpu; | 102 | Tegra::GPU& gpu; |
| 101 | 103 | ||
| 102 | const Device device; | 104 | std::unique_ptr<VulkanObjects> vulkan_objects; |
| 103 | StateTracker state_tracker{gpu}; | 105 | Device device; |
| 106 | StateTracker state_tracker; | ||
| 107 | ProgramManager program_manager; | ||
| 108 | RasterizerOpenGL rasterizer; | ||
| 104 | 109 | ||
| 105 | // OpenGL object IDs | 110 | // OpenGL object IDs |
| 106 | OGLSampler present_sampler; | 111 | OGLSampler present_sampler; |
| @@ -116,11 +121,6 @@ private: | |||
| 116 | /// Display information for Switch screen | 121 | /// Display information for Switch screen |
| 117 | ScreenInfo screen_info; | 122 | ScreenInfo screen_info; |
| 118 | 123 | ||
| 119 | /// Global dummy shader pipeline | ||
| 120 | ProgramManager program_manager; | ||
| 121 | |||
| 122 | RasterizerOpenGL rasterizer; | ||
| 123 | |||
| 124 | /// OpenGL framebuffer data | 124 | /// OpenGL framebuffer data |
| 125 | std::vector<u8> gl_framebuffer_data; | 125 | std::vector<u8> gl_framebuffer_data; |
| 126 | 126 | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index eb849cbf2..aeb36551c 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -71,7 +71,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 72 | 72 | ||
| 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); |
| 74 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | 74 | glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); |
| 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 76 | 76 | ||
| 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| @@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 91 | glUniform1ui(5, params.x_shift); | 91 | glUniform1ui(5, params.x_shift); |
| 92 | glUniform1ui(6, params.block_height); | 92 | glUniform1ui(6, params.block_height); |
| 93 | glUniform1ui(7, params.block_height_mask); | 93 | glUniform1ui(7, params.block_height_mask); |
| 94 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | 94 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 95 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | 95 | image.guest_size_bytes - swizzle.buffer_offset); |
| 96 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | 96 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, |
| 97 | GL_WRITE_ONLY, store_format); | 97 | GL_WRITE_ONLY, store_format); |
| 98 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | 98 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); |
| @@ -108,7 +108,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; |
| 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 110 | 110 | ||
| 111 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | 111 | glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); |
| 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); |
| 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 114 | 114 | ||
| @@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 132 | glUniform1ui(7, params.block_height_mask); | 132 | glUniform1ui(7, params.block_height_mask); |
| 133 | glUniform1ui(8, params.block_depth); | 133 | glUniform1ui(8, params.block_depth); |
| 134 | glUniform1ui(9, params.block_depth_mask); | 134 | glUniform1ui(9, params.block_depth_mask); |
| 135 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | 135 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 136 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | 136 | image.guest_size_bytes - swizzle.buffer_offset); |
| 137 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | 137 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, |
| 138 | GL_WRITE_ONLY, store_format); | 138 | GL_WRITE_ONLY, store_format); |
| 139 | glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | 139 | glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| @@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
| 159 | "Non-power of two images are not implemented"); | 159 | "Non-power of two images are not implemented"); |
| 160 | 160 | ||
| 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); |
| 162 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | 162 | glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); |
| 163 | glUniform2ui(LOC_ORIGIN, 0, 0); | 163 | glUniform2ui(LOC_ORIGIN, 0, 0); |
| 164 | glUniform2i(LOC_DESTINATION, 0, 0); | 164 | glUniform2i(LOC_DESTINATION, 0, 0); |
| 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); |
| @@ -172,8 +172,8 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
| 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
| 174 | 174 | ||
| 175 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | 175 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 176 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | 176 | image.guest_size_bytes - swizzle.buffer_offset); |
| 177 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | 177 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); |
| 178 | } | 178 | } |
| 179 | program_manager.RestoreGuestCompute(); | 179 | program_manager.RestoreGuestCompute(); |
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 359997255..bec026bc3 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -15,9 +15,10 @@ | |||
| 15 | namespace OpenGL { | 15 | namespace OpenGL { |
| 16 | 16 | ||
| 17 | class Image; | 17 | class Image; |
| 18 | class ImageBufferMap; | ||
| 19 | class ProgramManager; | 18 | class ProgramManager; |
| 20 | 19 | ||
| 20 | struct ImageBufferMap; | ||
| 21 | |||
| 21 | class UtilShaders { | 22 | class UtilShaders { |
| 22 | public: | 23 | public: |
| 23 | explicit UtilShaders(ProgramManager& program_manager); | 24 | explicit UtilShaders(ProgramManager& program_manager); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 85121d9fd..19aaf034f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | |||
| 531 | return {}; | 531 | return {}; |
| 532 | } | 532 | } |
| 533 | 533 | ||
| 534 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { | 534 | VkIndexType IndexFormat(Maxwell::IndexFormat index_format) { |
| 535 | switch (index_format) { | 535 | switch (index_format) { |
| 536 | case Maxwell::IndexFormat::UnsignedByte: | 536 | case Maxwell::IndexFormat::UnsignedByte: |
| 537 | if (!device.IsExtIndexTypeUint8Supported()) { | ||
| 538 | UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); | ||
| 539 | return VK_INDEX_TYPE_UINT16; | ||
| 540 | } | ||
| 541 | return VK_INDEX_TYPE_UINT8_EXT; | 537 | return VK_INDEX_TYPE_UINT8_EXT; |
| 542 | case Maxwell::IndexFormat::UnsignedShort: | 538 | case Maxwell::IndexFormat::UnsignedShort: |
| 543 | return VK_INDEX_TYPE_UINT16; | 539 | return VK_INDEX_TYPE_UINT16; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7c34b47dc..e3e06ba38 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 53 | 53 | ||
| 54 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | 54 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); |
| 55 | 55 | ||
| 56 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); | 56 | VkIndexType IndexFormat(Maxwell::IndexFormat index_format); |
| 57 | 57 | ||
| 58 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); | 58 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); |
| 59 | 59 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6909576cb..1cc720ddd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -107,7 +107,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 107 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | 107 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), |
| 108 | surface(CreateSurface(instance, render_window)), | 108 | surface(CreateSurface(instance, render_window)), |
| 109 | device(CreateDevice(instance, dld, *surface)), | 109 | device(CreateDevice(instance, dld, *surface)), |
| 110 | memory_allocator(device), | 110 | memory_allocator(device, false), |
| 111 | state_tracker(gpu), | 111 | state_tracker(gpu), |
| 112 | scheduler(device, state_tracker), | 112 | scheduler(device, state_tracker), |
| 113 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | 113 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 1efaf3b77..72071316c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -58,12 +58,11 @@ private: | |||
| 58 | vk::InstanceDispatch dld; | 58 | vk::InstanceDispatch dld; |
| 59 | 59 | ||
| 60 | vk::Instance instance; | 60 | vk::Instance instance; |
| 61 | 61 | vk::DebugUtilsMessenger debug_callback; | |
| 62 | vk::SurfaceKHR surface; | 62 | vk::SurfaceKHR surface; |
| 63 | 63 | ||
| 64 | VKScreenInfo screen_info; | 64 | VKScreenInfo screen_info; |
| 65 | 65 | ||
| 66 | vk::DebugUtilsMessenger debug_callback; | ||
| 67 | Device device; | 66 | Device device; |
| 68 | MemoryAllocator memory_allocator; | 67 | MemoryAllocator memory_allocator; |
| 69 | StateTracker state_tracker; | 68 | StateTracker state_tracker; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index df8992528..a1a32aabe 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -148,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 148 | SetUniformData(data, framebuffer); | 148 | SetUniformData(data, framebuffer); |
| 149 | SetVertexData(data, framebuffer); | 149 | SetVertexData(data, framebuffer); |
| 150 | 150 | ||
| 151 | const std::span<u8> map = buffer_commit.Map(); | 151 | const std::span<u8> mapped_span = buffer_commit.Map(); |
| 152 | std::memcpy(map.data(), &data, sizeof(data)); | 152 | std::memcpy(mapped_span.data(), &data, sizeof(data)); |
| 153 | 153 | ||
| 154 | if (!use_accelerated) { | 154 | if (!use_accelerated) { |
| 155 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 155 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); |
| @@ -162,8 +162,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 162 | constexpr u32 block_height_log2 = 4; | 162 | constexpr u32 block_height_log2 = 4; |
| 163 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); | 163 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); |
| 164 | Tegra::Texture::UnswizzleTexture( | 164 | Tegra::Texture::UnswizzleTexture( |
| 165 | map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, | 165 | mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), |
| 166 | framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | 166 | bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); |
| 167 | 167 | ||
| 168 | const VkBufferImageCopy copy{ | 168 | const VkBufferImageCopy copy{ |
| 169 | .bufferOffset = image_offset, | 169 | .bufferOffset = image_offset, |
| @@ -263,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 263 | cmdbuf.Draw(4, 1, 0, 0); | 263 | cmdbuf.Draw(4, 1, 0, 0); |
| 264 | cmdbuf.EndRenderPass(); | 264 | cmdbuf.EndRenderPass(); |
| 265 | }); | 265 | }); |
| 266 | |||
| 267 | return *semaphores[image_index]; | 266 | return *semaphores[image_index]; |
| 268 | } | 267 | } |
| 269 | 268 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8ad40a0f..48fc5d966 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -3,188 +3,276 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | ||
| 6 | #include <cstring> | 7 | #include <cstring> |
| 7 | #include <memory> | 8 | #include <span> |
| 9 | #include <vector> | ||
| 8 | 10 | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 11 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 13 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_device.h" | 17 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 16 | 20 | ||
| 17 | namespace Vulkan { | 21 | namespace Vulkan { |
| 18 | |||
| 19 | namespace { | 22 | namespace { |
| 23 | VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) { | ||
| 24 | return VkBufferCopy{ | ||
| 25 | .srcOffset = copy.src_offset, | ||
| 26 | .dstOffset = copy.dst_offset, | ||
| 27 | .size = copy.size, | ||
| 28 | }; | ||
| 29 | } | ||
| 20 | 30 | ||
| 21 | constexpr VkBufferUsageFlags BUFFER_USAGE = | 31 | VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) { |
| 22 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | 32 | if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) { |
| 23 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; | 33 | return VK_INDEX_TYPE_UINT8_EXT; |
| 24 | 34 | } | |
| 25 | constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = | 35 | if (num_elements <= 0xffff) { |
| 26 | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | | 36 | return VK_INDEX_TYPE_UINT16; |
| 27 | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | 37 | } |
| 28 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | 38 | return VK_INDEX_TYPE_UINT32; |
| 29 | 39 | } | |
| 30 | constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = | ||
| 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | | ||
| 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; | ||
| 33 | 40 | ||
| 34 | constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = | 41 | size_t BytesPerIndex(VkIndexType index_type) { |
| 35 | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; | 42 | switch (index_type) { |
| 43 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 44 | return 1; | ||
| 45 | case VK_INDEX_TYPE_UINT16: | ||
| 46 | return 2; | ||
| 47 | case VK_INDEX_TYPE_UINT32: | ||
| 48 | return 4; | ||
| 49 | default: | ||
| 50 | UNREACHABLE_MSG("Invalid index type={}", index_type); | ||
| 51 | return 1; | ||
| 52 | } | ||
| 53 | } | ||
| 36 | 54 | ||
| 55 | template <typename T> | ||
| 56 | std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | ||
| 57 | std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||
| 58 | std::ranges::transform(indices, indices.begin(), | ||
| 59 | [quad, first](u32 index) { return first + index + quad * 4; }); | ||
| 60 | return indices; | ||
| 61 | } | ||
| 37 | } // Anonymous namespace | 62 | } // Anonymous namespace |
| 38 | 63 | ||
| 39 | Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, | 64 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 40 | StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) | 65 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} |
| 41 | : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ | 66 | |
| 42 | staging_pool_} { | 67 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 43 | buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 68 | VAddr cpu_addr_, u64 size_bytes_) |
| 69 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 70 | buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 44 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 71 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 45 | .pNext = nullptr, | 72 | .pNext = nullptr, |
| 46 | .flags = 0, | 73 | .flags = 0, |
| 47 | .size = static_cast<VkDeviceSize>(size_), | 74 | .size = SizeBytes(), |
| 48 | .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 75 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 76 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 77 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 78 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 79 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 49 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 80 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 50 | .queueFamilyIndexCount = 0, | 81 | .queueFamilyIndexCount = 0, |
| 51 | .pQueueFamilyIndices = nullptr, | 82 | .pQueueFamilyIndices = nullptr, |
| 52 | }); | 83 | }); |
| 53 | commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 84 | if (runtime.device.HasDebuggingToolAttached()) { |
| 85 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | ||
| 86 | } | ||
| 87 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 54 | } | 88 | } |
| 55 | 89 | ||
| 56 | Buffer::~Buffer() = default; | 90 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, |
| 91 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 93 | VKDescriptorPool& descriptor_pool) | ||
| 94 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 95 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | ||
| 96 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 97 | quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} | ||
| 57 | 98 | ||
| 58 | void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | 99 | StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { |
| 59 | const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); | 100 | return staging_pool.Request(size, MemoryUsage::Upload); |
| 60 | std::memcpy(staging.mapped_span.data(), data, data_size); | 101 | } |
| 61 | 102 | ||
| 62 | scheduler.RequestOutsideRenderPassOperationContext(); | 103 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 104 | return staging_pool.Request(size, MemoryUsage::Download); | ||
| 105 | } | ||
| 63 | 106 | ||
| 64 | const VkBuffer handle = Handle(); | 107 | void BufferCacheRuntime::Finish() { |
| 65 | scheduler.Record([staging = staging.buffer, handle, offset, data_size, | 108 | scheduler.Finish(); |
| 66 | &device = device](vk::CommandBuffer cmdbuf) { | 109 | } |
| 67 | const VkBufferMemoryBarrier read_barrier{ | 110 | |
| 68 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 111 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 69 | .pNext = nullptr, | 112 | std::span<const VideoCommon::BufferCopy> copies) { |
| 70 | .srcAccessMask = | 113 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 71 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | | 114 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 72 | VK_ACCESS_HOST_WRITE_BIT | | 115 | .pNext = nullptr, |
| 73 | (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), | 116 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, |
| 74 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | 117 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 75 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 118 | }; |
| 76 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 119 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 77 | .buffer = handle, | 120 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 78 | .offset = offset, | 121 | .pNext = nullptr, |
| 79 | .size = data_size, | 122 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 80 | }; | 123 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 81 | const VkBufferMemoryBarrier write_barrier{ | 124 | }; |
| 82 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 125 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 83 | .pNext = nullptr, | 126 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); |
| 84 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 127 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 85 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | 128 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 86 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 129 | scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| 87 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 88 | .buffer = handle, | ||
| 89 | .offset = offset, | ||
| 90 | .size = data_size, | ||
| 91 | }; | ||
| 92 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | 130 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 93 | 0, read_barrier); | 131 | 0, READ_BARRIER); |
| 94 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | 132 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); |
| 95 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, | 133 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 96 | write_barrier); | 134 | 0, WRITE_BARRIER); |
| 97 | }); | 135 | }); |
| 98 | } | 136 | } |
| 99 | 137 | ||
| 100 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, |
| 101 | auto staging = staging_pool.Request(data_size, MemoryUsage::Download); | 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, |
| 102 | scheduler.RequestOutsideRenderPassOperationContext(); | 140 | u32 offset, [[maybe_unused]] u32 size) { |
| 141 | VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); | ||
| 142 | if (topology == PrimitiveTopology::Quads) { | ||
| 143 | index_type = VK_INDEX_TYPE_UINT32; | ||
| 144 | std::tie(buffer, offset) = | ||
| 145 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | ||
| 146 | } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | ||
| 147 | index_type = VK_INDEX_TYPE_UINT16; | ||
| 148 | std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); | ||
| 149 | } | ||
| 150 | scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { | ||
| 151 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||
| 152 | }); | ||
| 153 | } | ||
| 103 | 154 | ||
| 104 | const VkBuffer handle = Handle(); | 155 | void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { |
| 105 | scheduler.Record( | 156 | ReserveQuadArrayLUT(first + count, true); |
| 106 | [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) { | ||
| 107 | const VkBufferMemoryBarrier barrier{ | ||
| 108 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 109 | .pNext = nullptr, | ||
| 110 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 111 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 112 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 113 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 114 | .buffer = handle, | ||
| 115 | .offset = offset, | ||
| 116 | .size = data_size, | ||
| 117 | }; | ||
| 118 | |||
| 119 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | | ||
| 120 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | ||
| 121 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 122 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | ||
| 123 | cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); | ||
| 124 | }); | ||
| 125 | scheduler.Finish(); | ||
| 126 | 157 | ||
| 127 | std::memcpy(data, staging.mapped_span.data(), data_size); | 158 | // The LUT has the indices 0, 1, 2, and 3 copied as an array |
| 159 | // To apply these 'first' offsets we can apply an offset based on the modulus. | ||
| 160 | const VkIndexType index_type = quad_array_lut_index_type; | ||
| 161 | const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); | ||
| 162 | const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); | ||
| 163 | scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { | ||
| 164 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||
| 165 | }); | ||
| 128 | } | 166 | } |
| 129 | 167 | ||
| 130 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 168 | void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, |
| 131 | std::size_t copy_size) { | 169 | u32 stride) { |
| 132 | scheduler.RequestOutsideRenderPassOperationContext(); | 170 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 171 | scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { | ||
| 172 | const VkDeviceSize vk_offset = offset; | ||
| 173 | const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; | ||
| 174 | const VkDeviceSize vk_stride = stride; | ||
| 175 | cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); | ||
| 176 | }); | ||
| 177 | } else { | ||
| 178 | scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { | ||
| 179 | cmdbuf.BindVertexBuffer(index, buffer, offset); | ||
| 180 | }); | ||
| 181 | } | ||
| 182 | } | ||
| 133 | 183 | ||
| 134 | const VkBuffer dst_buffer = Handle(); | 184 | void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, |
| 135 | scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, | 185 | u32 size) { |
| 136 | copy_size](vk::CommandBuffer cmdbuf) { | 186 | if (!device.IsExtTransformFeedbackSupported()) { |
| 137 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); | 187 | // Already logged in the rasterizer |
| 138 | 188 | return; | |
| 139 | std::array<VkBufferMemoryBarrier, 2> barriers; | 189 | } |
| 140 | barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 190 | scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { |
| 141 | barriers[0].pNext = nullptr; | 191 | const VkDeviceSize vk_offset = offset; |
| 142 | barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | 192 | const VkDeviceSize vk_size = size; |
| 143 | barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | 193 | cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size); |
| 144 | barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 145 | barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 146 | barriers[0].buffer = src_buffer; | ||
| 147 | barriers[0].offset = src_offset; | ||
| 148 | barriers[0].size = copy_size; | ||
| 149 | barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 150 | barriers[1].pNext = nullptr; | ||
| 151 | barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 152 | barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; | ||
| 153 | barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 154 | barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 155 | barriers[1].buffer = dst_buffer; | ||
| 156 | barriers[1].offset = dst_offset; | ||
| 157 | barriers[1].size = copy_size; | ||
| 158 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | ||
| 159 | barriers, {}); | ||
| 160 | }); | 194 | }); |
| 161 | } | 195 | } |
| 162 | 196 | ||
| 163 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 197 | void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) { |
| 164 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 198 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| 165 | const Device& device_, MemoryAllocator& memory_allocator_, | ||
| 166 | VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, | ||
| 167 | StagingBufferPool& staging_pool_) | ||
| 168 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, | ||
| 169 | cpu_memory_, stream_buffer_}, | ||
| 170 | device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 171 | staging_pool{staging_pool_} {} | ||
| 172 | |||
| 173 | VKBufferCache::~VKBufferCache() = default; | ||
| 174 | |||
| 175 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||
| 176 | return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, | ||
| 177 | size); | ||
| 178 | } | 199 | } |
| 179 | 200 | ||
| 180 | VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { | 201 | void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { |
| 181 | size = std::max(size, std::size_t(4)); | 202 | if (num_indices <= current_num_indices) { |
| 182 | const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); | 203 | return; |
| 204 | } | ||
| 205 | if (wait_for_idle) { | ||
| 206 | scheduler.Finish(); | ||
| 207 | } | ||
| 208 | current_num_indices = num_indices; | ||
| 209 | quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); | ||
| 210 | |||
| 211 | const u32 num_quads = num_indices / 4; | ||
| 212 | const u32 num_triangle_indices = num_quads * 6; | ||
| 213 | const u32 num_first_offset_copies = 4; | ||
| 214 | const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); | ||
| 215 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||
| 216 | quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 217 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 218 | .pNext = nullptr, | ||
| 219 | .flags = 0, | ||
| 220 | .size = size_bytes, | ||
| 221 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 222 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 223 | .queueFamilyIndexCount = 0, | ||
| 224 | .pQueueFamilyIndices = nullptr, | ||
| 225 | }); | ||
| 226 | if (device.HasDebuggingToolAttached()) { | ||
| 227 | quad_array_lut.SetObjectNameEXT("Quad LUT"); | ||
| 228 | } | ||
| 229 | quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); | ||
| 230 | |||
| 231 | const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||
| 232 | u8* staging_data = staging.mapped_span.data(); | ||
| 233 | const size_t quad_size = bytes_per_index * 6; | ||
| 234 | for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||
| 235 | for (u32 quad = 0; quad < num_quads; ++quad) { | ||
| 236 | switch (quad_array_lut_index_type) { | ||
| 237 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 238 | std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); | ||
| 239 | break; | ||
| 240 | case VK_INDEX_TYPE_UINT16: | ||
| 241 | std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); | ||
| 242 | break; | ||
| 243 | case VK_INDEX_TYPE_UINT32: | ||
| 244 | std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); | ||
| 245 | break; | ||
| 246 | default: | ||
| 247 | UNREACHABLE(); | ||
| 248 | break; | ||
| 249 | } | ||
| 250 | staging_data += quad_size; | ||
| 251 | } | ||
| 252 | } | ||
| 183 | scheduler.RequestOutsideRenderPassOperationContext(); | 253 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 184 | scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { | 254 | scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut, |
| 185 | cmdbuf.FillBuffer(buffer, 0, size, 0); | 255 | size_bytes](vk::CommandBuffer cmdbuf) { |
| 256 | const VkBufferCopy copy{ | ||
| 257 | .srcOffset = 0, | ||
| 258 | .dstOffset = 0, | ||
| 259 | .size = size_bytes, | ||
| 260 | }; | ||
| 261 | const VkBufferMemoryBarrier write_barrier{ | ||
| 262 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 263 | .pNext = nullptr, | ||
| 264 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 265 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||
| 266 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 267 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 268 | .buffer = dst_buffer, | ||
| 269 | .offset = 0, | ||
| 270 | .size = size_bytes, | ||
| 271 | }; | ||
| 272 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 273 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | ||
| 274 | 0, write_barrier); | ||
| 186 | }); | 275 | }); |
| 187 | return {empty.buffer, 0, 0}; | ||
| 188 | } | 276 | } |
| 189 | 277 | ||
| 190 | } // namespace Vulkan | 278 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 41d577510..d232e1f2d 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -4,69 +4,112 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 7 | #include "video_core/buffer_cache/buffer_cache.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 12 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 11 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 13 | ||
| 16 | namespace Vulkan { | 14 | namespace Vulkan { |
| 17 | 15 | ||
| 18 | class Device; | 16 | class Device; |
| 17 | class VKDescriptorPool; | ||
| 19 | class VKScheduler; | 18 | class VKScheduler; |
| 19 | class VKUpdateDescriptorQueue; | ||
| 20 | 20 | ||
| 21 | class Buffer final : public VideoCommon::BufferBlock { | 21 | class BufferCacheRuntime; |
| 22 | public: | ||
| 23 | explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, | ||
| 24 | StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); | ||
| 25 | ~Buffer(); | ||
| 26 | |||
| 27 | void Upload(std::size_t offset, std::size_t data_size, const u8* data); | ||
| 28 | |||
| 29 | void Download(std::size_t offset, std::size_t data_size, u8* data); | ||
| 30 | 22 | ||
| 31 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 23 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { |
| 32 | std::size_t copy_size); | 24 | public: |
| 25 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); | ||
| 26 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||
| 27 | VAddr cpu_addr_, u64 size_bytes_); | ||
| 33 | 28 | ||
| 34 | VkBuffer Handle() const { | 29 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 35 | return *buffer; | 30 | return *buffer; |
| 36 | } | 31 | } |
| 37 | 32 | ||
| 38 | u64 Address() const { | 33 | operator VkBuffer() const noexcept { |
| 39 | return 0; | 34 | return *buffer; |
| 40 | } | 35 | } |
| 41 | 36 | ||
| 42 | private: | 37 | private: |
| 43 | const Device& device; | ||
| 44 | VKScheduler& scheduler; | ||
| 45 | StagingBufferPool& staging_pool; | ||
| 46 | |||
| 47 | vk::Buffer buffer; | 38 | vk::Buffer buffer; |
| 48 | MemoryCommit commit; | 39 | MemoryCommit commit; |
| 49 | }; | 40 | }; |
| 50 | 41 | ||
| 51 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 42 | class BufferCacheRuntime { |
| 43 | friend Buffer; | ||
| 44 | |||
| 45 | using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology; | ||
| 46 | using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat; | ||
| 47 | |||
| 52 | public: | 48 | public: |
| 53 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, | 49 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, |
| 54 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | 50 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 55 | const Device& device, MemoryAllocator& memory_allocator, | 51 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 56 | VKScheduler& scheduler, VKStreamBuffer& stream_buffer, | 52 | VKDescriptorPool& descriptor_pool); |
| 57 | StagingBufferPool& staging_pool); | 53 | |
| 58 | ~VKBufferCache(); | 54 | void Finish(); |
| 55 | |||
| 56 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | ||
| 57 | |||
| 58 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | ||
| 59 | 59 | ||
| 60 | BufferInfo GetEmptyBuffer(std::size_t size) override; | 60 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, |
| 61 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 61 | 62 | ||
| 62 | protected: | 63 | void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, |
| 63 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 64 | u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); |
| 65 | |||
| 66 | void BindQuadArrayIndexBuffer(u32 first, u32 count); | ||
| 67 | |||
| 68 | void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); | ||
| 69 | |||
| 70 | void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); | ||
| 71 | |||
| 72 | void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) { | ||
| 73 | BindBuffer(buffer, offset, size); | ||
| 74 | } | ||
| 75 | |||
| 76 | void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size, | ||
| 77 | [[maybe_unused]] bool is_written) { | ||
| 78 | BindBuffer(buffer, offset, size); | ||
| 79 | } | ||
| 64 | 80 | ||
| 65 | private: | 81 | private: |
| 82 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size); | ||
| 83 | |||
| 84 | void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); | ||
| 85 | |||
| 66 | const Device& device; | 86 | const Device& device; |
| 67 | MemoryAllocator& memory_allocator; | 87 | MemoryAllocator& memory_allocator; |
| 68 | VKScheduler& scheduler; | 88 | VKScheduler& scheduler; |
| 69 | StagingBufferPool& staging_pool; | 89 | StagingBufferPool& staging_pool; |
| 90 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 91 | |||
| 92 | vk::Buffer quad_array_lut; | ||
| 93 | MemoryCommit quad_array_lut_commit; | ||
| 94 | VkIndexType quad_array_lut_index_type{}; | ||
| 95 | u32 current_num_indices = 0; | ||
| 96 | |||
| 97 | Uint8Pass uint8_pass; | ||
| 98 | QuadIndexedPass quad_index_pass; | ||
| 70 | }; | 99 | }; |
| 71 | 100 | ||
| 101 | struct BufferCacheParams { | ||
| 102 | using Runtime = Vulkan::BufferCacheRuntime; | ||
| 103 | using Buffer = Vulkan::Buffer; | ||
| 104 | |||
| 105 | static constexpr bool IS_OPENGL = false; | ||
| 106 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | ||
| 107 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; | ||
| 108 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | ||
| 109 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | ||
| 110 | static constexpr bool USE_MEMORY_MAPS = true; | ||
| 111 | }; | ||
| 112 | |||
| 113 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||
| 114 | |||
| 72 | } // namespace Vulkan | 115 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 5eb6a54be..a4fdcdf81 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" | ||
| 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 13 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 14 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 15 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -22,30 +21,7 @@ | |||
| 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 23 | 22 | ||
| 24 | namespace Vulkan { | 23 | namespace Vulkan { |
| 25 | |||
| 26 | namespace { | 24 | namespace { |
| 27 | |||
| 28 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | ||
| 29 | return { | ||
| 30 | .binding = 0, | ||
| 31 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 32 | .descriptorCount = 1, | ||
| 33 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 34 | .pImmutableSamplers = nullptr, | ||
| 35 | }; | ||
| 36 | } | ||
| 37 | |||
| 38 | VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { | ||
| 39 | return { | ||
| 40 | .dstBinding = 0, | ||
| 41 | .dstArrayElement = 0, | ||
| 42 | .descriptorCount = 1, | ||
| 43 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 44 | .offset = 0, | ||
| 45 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 46 | }; | ||
| 47 | } | ||
| 48 | |||
| 49 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 25 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 50 | return { | 26 | return { |
| 51 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 27 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| @@ -162,55 +138,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( | |||
| 162 | return set; | 138 | return set; |
| 163 | } | 139 | } |
| 164 | 140 | ||
| 165 | QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||
| 166 | VKDescriptorPool& descriptor_pool_, | ||
| 167 | StagingBufferPool& staging_buffer_pool_, | ||
| 168 | VKUpdateDescriptorQueue& update_descriptor_queue_) | ||
| 169 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), | ||
| 170 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | ||
| 171 | BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), | ||
| 172 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 173 | update_descriptor_queue{update_descriptor_queue_} {} | ||
| 174 | |||
| 175 | QuadArrayPass::~QuadArrayPass() = default; | ||
| 176 | |||
| 177 | std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | ||
| 178 | const u32 num_triangle_vertices = (num_vertices / 4) * 6; | ||
| 179 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | ||
| 180 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||
| 181 | |||
| 182 | update_descriptor_queue.Acquire(); | ||
| 183 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | ||
| 184 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||
| 185 | |||
| 186 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 187 | |||
| 188 | ASSERT(num_vertices % 4 == 0); | ||
| 189 | const u32 num_quads = num_vertices / 4; | ||
| 190 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, | ||
| 191 | num_quads, first, set](vk::CommandBuffer cmdbuf) { | ||
| 192 | constexpr u32 dispatch_size = 1024; | ||
| 193 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 194 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 195 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); | ||
| 196 | cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); | ||
| 197 | |||
| 198 | VkBufferMemoryBarrier barrier; | ||
| 199 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 200 | barrier.pNext = nullptr; | ||
| 201 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 202 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 203 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 204 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 205 | barrier.buffer = buffer; | ||
| 206 | barrier.offset = 0; | ||
| 207 | barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); | ||
| 208 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 209 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); | ||
| 210 | }); | ||
| 211 | return {staging_ref.buffer, 0}; | ||
| 212 | } | ||
| 213 | |||
| 214 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | 141 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, |
| 215 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, | 142 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, |
| 216 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 143 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| @@ -221,18 +148,18 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | |||
| 221 | 148 | ||
| 222 | Uint8Pass::~Uint8Pass() = default; | 149 | Uint8Pass::~Uint8Pass() = default; |
| 223 | 150 | ||
| 224 | std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | 151 | std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 225 | u64 src_offset) { | 152 | u32 src_offset) { |
| 226 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | 153 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); |
| 227 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 154 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 228 | 155 | ||
| 229 | update_descriptor_queue.Acquire(); | 156 | update_descriptor_queue.Acquire(); |
| 230 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 157 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 231 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | 158 | update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); |
| 232 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 159 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 233 | 160 | ||
| 234 | scheduler.RequestOutsideRenderPassOperationContext(); | 161 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 235 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | 162 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 236 | num_vertices](vk::CommandBuffer cmdbuf) { | 163 | num_vertices](vk::CommandBuffer cmdbuf) { |
| 237 | constexpr u32 dispatch_size = 1024; | 164 | constexpr u32 dispatch_size = 1024; |
| 238 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 165 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| @@ -252,7 +179,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff | |||
| 252 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 179 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 253 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 180 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); |
| 254 | }); | 181 | }); |
| 255 | return {staging_ref.buffer, 0}; | 182 | return {staging.buffer, 0}; |
| 256 | } | 183 | } |
| 257 | 184 | ||
| 258 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 185 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| @@ -267,9 +194,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | |||
| 267 | 194 | ||
| 268 | QuadIndexedPass::~QuadIndexedPass() = default; | 195 | QuadIndexedPass::~QuadIndexedPass() = default; |
| 269 | 196 | ||
| 270 | std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | 197 | std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( |
| 271 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | 198 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |
| 272 | VkBuffer src_buffer, u64 src_offset) { | 199 | VkBuffer src_buffer, u32 src_offset) { |
| 273 | const u32 index_shift = [index_format] { | 200 | const u32 index_shift = [index_format] { |
| 274 | switch (index_format) { | 201 | switch (index_format) { |
| 275 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | 202 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: |
| @@ -286,15 +213,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | |||
| 286 | const u32 num_tri_vertices = (num_vertices / 4) * 6; | 213 | const u32 num_tri_vertices = (num_vertices / 4) * 6; |
| 287 | 214 | ||
| 288 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); | 215 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); |
| 289 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 216 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 290 | 217 | ||
| 291 | update_descriptor_queue.Acquire(); | 218 | update_descriptor_queue.Acquire(); |
| 292 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 219 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 293 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | 220 | update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); |
| 294 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 221 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 295 | 222 | ||
| 296 | scheduler.RequestOutsideRenderPassOperationContext(); | 223 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 297 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | 224 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 298 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 225 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { |
| 299 | static constexpr u32 dispatch_size = 1024; | 226 | static constexpr u32 dispatch_size = 1024; |
| 300 | const std::array push_constants = {base_vertex, index_shift}; | 227 | const std::array push_constants = {base_vertex, index_shift}; |
| @@ -317,7 +244,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | |||
| 317 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 244 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 318 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 245 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); |
| 319 | }); | 246 | }); |
| 320 | return {staging_ref.buffer, 0}; | 247 | return {staging.buffer, 0}; |
| 321 | } | 248 | } |
| 322 | 249 | ||
| 323 | } // namespace Vulkan | 250 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index f5c6f5f17..4904019f5 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -41,22 +41,6 @@ private: | |||
| 41 | vk::ShaderModule module; | 41 | vk::ShaderModule module; |
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | class QuadArrayPass final : public VKComputePass { | ||
| 45 | public: | ||
| 46 | explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||
| 47 | VKDescriptorPool& descriptor_pool_, | ||
| 48 | StagingBufferPool& staging_buffer_pool_, | ||
| 49 | VKUpdateDescriptorQueue& update_descriptor_queue_); | ||
| 50 | ~QuadArrayPass(); | ||
| 51 | |||
| 52 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); | ||
| 53 | |||
| 54 | private: | ||
| 55 | VKScheduler& scheduler; | ||
| 56 | StagingBufferPool& staging_buffer_pool; | ||
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class Uint8Pass final : public VKComputePass { | 44 | class Uint8Pass final : public VKComputePass { |
| 61 | public: | 45 | public: |
| 62 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, | 46 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| @@ -64,7 +48,9 @@ public: | |||
| 64 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 48 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 65 | ~Uint8Pass(); | 49 | ~Uint8Pass(); |
| 66 | 50 | ||
| 67 | std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); | 51 | /// Assemble uint8 indices into an uint16 index buffer |
| 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is | ||
| 53 | std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); | ||
| 68 | 54 | ||
| 69 | private: | 55 | private: |
| 70 | VKScheduler& scheduler; | 56 | VKScheduler& scheduler; |
| @@ -80,9 +66,9 @@ public: | |||
| 80 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 66 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 81 | ~QuadIndexedPass(); | 67 | ~QuadIndexedPass(); |
| 82 | 68 | ||
| 83 | std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | 69 | std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, |
| 84 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | 70 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, |
| 85 | u64 src_offset); | 71 | u32 src_offset); |
| 86 | 72 | ||
| 87 | private: | 73 | private: |
| 88 | VKScheduler& scheduler; | 74 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 6cd00884d..3bec48d14 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -45,8 +45,8 @@ void InnerFence::Wait() { | |||
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 47 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 48 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | 48 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 49 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 49 | VKQueryCache& query_cache_, const Device& device_, |
| 50 | VKScheduler& scheduler_) | 50 | VKScheduler& scheduler_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, |
| 52 | scheduler{scheduler_} {} | 52 | scheduler{scheduler_} {} |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 9c5e5aa8f..2f8322d29 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -22,7 +22,6 @@ class RasterizerInterface; | |||
| 22 | namespace Vulkan { | 22 | namespace Vulkan { |
| 23 | 23 | ||
| 24 | class Device; | 24 | class Device; |
| 25 | class VKBufferCache; | ||
| 26 | class VKQueryCache; | 25 | class VKQueryCache; |
| 27 | class VKScheduler; | 26 | class VKScheduler; |
| 28 | 27 | ||
| @@ -45,14 +44,14 @@ private: | |||
| 45 | using Fence = std::shared_ptr<InnerFence>; | 44 | using Fence = std::shared_ptr<InnerFence>; |
| 46 | 45 | ||
| 47 | using GenericFenceManager = | 46 | using GenericFenceManager = |
| 48 | VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; | 47 | VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>; |
| 49 | 48 | ||
| 50 | class VKFenceManager final : public GenericFenceManager { | 49 | class VKFenceManager final : public GenericFenceManager { |
| 51 | public: | 50 | public: |
| 52 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 51 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, |
| 53 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | 52 | TextureCache& texture_cache, BufferCache& buffer_cache, |
| 54 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 53 | VKQueryCache& query_cache, const Device& device, |
| 55 | VKScheduler& scheduler_); | 54 | VKScheduler& scheduler); |
| 56 | 55 | ||
| 57 | protected: | 56 | protected: |
| 58 | Fence CreateFence(u32 value, bool is_stubbed) override; | 57 | Fence CreateFence(u32 value, bool is_stubbed) override; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0a111829..684d4e3a6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | #include <mutex> | 8 | #include <mutex> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 14 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 15 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| @@ -24,7 +22,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 22 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 23 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 26 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 30 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| @@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25 | |||
| 50 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | 47 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); |
| 51 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | 48 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); |
| 52 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | 49 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); |
| 53 | MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||
| 54 | MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||
| 55 | MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||
| 56 | MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||
| 57 | MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||
| 58 | MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||
| 59 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | 50 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); |
| 60 | 51 | ||
| 61 | namespace { | 52 | namespace { |
| 53 | struct DrawParams { | ||
| 54 | u32 base_instance; | ||
| 55 | u32 num_instances; | ||
| 56 | u32 base_vertex; | ||
| 57 | u32 num_vertices; | ||
| 58 | bool is_indexed; | ||
| 59 | }; | ||
| 62 | 60 | ||
| 63 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); | 61 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); |
| 64 | 62 | ||
| @@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 67 | const float width = src.scale_x * 2.0f; | 65 | const float width = src.scale_x * 2.0f; |
| 68 | const float height = src.scale_y * 2.0f; | 66 | const float height = src.scale_y * 2.0f; |
| 69 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; | 67 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; |
| 70 | |||
| 71 | VkViewport viewport{ | 68 | VkViewport viewport{ |
| 72 | .x = src.translate_x - src.scale_x, | 69 | .x = src.translate_x - src.scale_x, |
| 73 | .y = src.translate_y - src.scale_y, | 70 | .y = src.translate_y - src.scale_y, |
| @@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 76 | .minDepth = src.translate_z - src.scale_z * reduce_z, | 73 | .minDepth = src.translate_z - src.scale_z * reduce_z, |
| 77 | .maxDepth = src.translate_z + src.scale_z, | 74 | .maxDepth = src.translate_z + src.scale_z, |
| 78 | }; | 75 | }; |
| 79 | |||
| 80 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | 76 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { |
| 81 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); | 77 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); |
| 82 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); | 78 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); |
| 83 | } | 79 | } |
| 84 | |||
| 85 | return viewport; | 80 | return viewport; |
| 86 | } | 81 | } |
| 87 | 82 | ||
| @@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
| 146 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 147 | } | 142 | } |
| 148 | 143 | ||
| 149 | template <size_t N> | ||
| 150 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { | ||
| 151 | std::array<VkDeviceSize, N> expanded; | ||
| 152 | std::copy(strides.begin(), strides.end(), expanded.begin()); | ||
| 153 | return expanded; | ||
| 154 | } | ||
| 155 | |||
| 156 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |
| 157 | if (entry.is_buffer) { | 145 | if (entry.is_buffer) { |
| 158 | return ImageViewType::e2D; | 146 | return ImageViewType::e2D; |
| @@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca | |||
| 221 | } | 209 | } |
| 222 | } | 210 | } |
| 223 | 211 | ||
| 224 | } // Anonymous namespace | 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 225 | 213 | bool is_indexed) { | |
| 226 | class BufferBindings final { | 214 | DrawParams params{ |
| 227 | public: | 215 | .base_instance = regs.vb_base_instance, |
| 228 | void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { | 216 | .num_instances = is_instanced ? num_instances : 1, |
| 229 | vertex.buffers[vertex.num_buffers] = buffer; | 217 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, |
| 230 | vertex.offsets[vertex.num_buffers] = offset; | 218 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, |
| 231 | vertex.sizes[vertex.num_buffers] = size; | 219 | .is_indexed = is_indexed, |
| 232 | vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); | 220 | }; |
| 233 | ++vertex.num_buffers; | 221 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| 234 | } | 222 | // 6 triangle vertices per quad, base vertex is part of the index |
| 235 | 223 | // See BindQuadArrayIndexBuffer for more details | |
| 236 | void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { | 224 | params.num_vertices = (params.num_vertices / 4) * 6; |
| 237 | index.buffer = buffer; | 225 | params.base_vertex = 0; |
| 238 | index.offset = offset; | 226 | params.is_indexed = true; |
| 239 | index.type = type; | ||
| 240 | } | ||
| 241 | |||
| 242 | void Bind(const Device& device, VKScheduler& scheduler) const { | ||
| 243 | // Use this large switch case to avoid dispatching more memory in the record lambda than | ||
| 244 | // what we need. It looks horrible, but it's the best we can do on standard C++. | ||
| 245 | switch (vertex.num_buffers) { | ||
| 246 | case 0: | ||
| 247 | return BindStatic<0>(device, scheduler); | ||
| 248 | case 1: | ||
| 249 | return BindStatic<1>(device, scheduler); | ||
| 250 | case 2: | ||
| 251 | return BindStatic<2>(device, scheduler); | ||
| 252 | case 3: | ||
| 253 | return BindStatic<3>(device, scheduler); | ||
| 254 | case 4: | ||
| 255 | return BindStatic<4>(device, scheduler); | ||
| 256 | case 5: | ||
| 257 | return BindStatic<5>(device, scheduler); | ||
| 258 | case 6: | ||
| 259 | return BindStatic<6>(device, scheduler); | ||
| 260 | case 7: | ||
| 261 | return BindStatic<7>(device, scheduler); | ||
| 262 | case 8: | ||
| 263 | return BindStatic<8>(device, scheduler); | ||
| 264 | case 9: | ||
| 265 | return BindStatic<9>(device, scheduler); | ||
| 266 | case 10: | ||
| 267 | return BindStatic<10>(device, scheduler); | ||
| 268 | case 11: | ||
| 269 | return BindStatic<11>(device, scheduler); | ||
| 270 | case 12: | ||
| 271 | return BindStatic<12>(device, scheduler); | ||
| 272 | case 13: | ||
| 273 | return BindStatic<13>(device, scheduler); | ||
| 274 | case 14: | ||
| 275 | return BindStatic<14>(device, scheduler); | ||
| 276 | case 15: | ||
| 277 | return BindStatic<15>(device, scheduler); | ||
| 278 | case 16: | ||
| 279 | return BindStatic<16>(device, scheduler); | ||
| 280 | case 17: | ||
| 281 | return BindStatic<17>(device, scheduler); | ||
| 282 | case 18: | ||
| 283 | return BindStatic<18>(device, scheduler); | ||
| 284 | case 19: | ||
| 285 | return BindStatic<19>(device, scheduler); | ||
| 286 | case 20: | ||
| 287 | return BindStatic<20>(device, scheduler); | ||
| 288 | case 21: | ||
| 289 | return BindStatic<21>(device, scheduler); | ||
| 290 | case 22: | ||
| 291 | return BindStatic<22>(device, scheduler); | ||
| 292 | case 23: | ||
| 293 | return BindStatic<23>(device, scheduler); | ||
| 294 | case 24: | ||
| 295 | return BindStatic<24>(device, scheduler); | ||
| 296 | case 25: | ||
| 297 | return BindStatic<25>(device, scheduler); | ||
| 298 | case 26: | ||
| 299 | return BindStatic<26>(device, scheduler); | ||
| 300 | case 27: | ||
| 301 | return BindStatic<27>(device, scheduler); | ||
| 302 | case 28: | ||
| 303 | return BindStatic<28>(device, scheduler); | ||
| 304 | case 29: | ||
| 305 | return BindStatic<29>(device, scheduler); | ||
| 306 | case 30: | ||
| 307 | return BindStatic<30>(device, scheduler); | ||
| 308 | case 31: | ||
| 309 | return BindStatic<31>(device, scheduler); | ||
| 310 | case 32: | ||
| 311 | return BindStatic<32>(device, scheduler); | ||
| 312 | } | ||
| 313 | UNREACHABLE(); | ||
| 314 | } | ||
| 315 | |||
| 316 | private: | ||
| 317 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | ||
| 318 | struct { | ||
| 319 | size_t num_buffers = 0; | ||
| 320 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; | ||
| 321 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; | ||
| 322 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; | ||
| 323 | std::array<u16, Maxwell::NumVertexArrays> strides; | ||
| 324 | } vertex; | ||
| 325 | |||
| 326 | struct { | ||
| 327 | VkBuffer buffer = nullptr; | ||
| 328 | VkDeviceSize offset; | ||
| 329 | VkIndexType type; | ||
| 330 | } index; | ||
| 331 | |||
| 332 | template <size_t N> | ||
| 333 | void BindStatic(const Device& device, VKScheduler& scheduler) const { | ||
| 334 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 335 | if (index.buffer) { | ||
| 336 | BindStatic<N, true, true>(scheduler); | ||
| 337 | } else { | ||
| 338 | BindStatic<N, false, true>(scheduler); | ||
| 339 | } | ||
| 340 | } else { | ||
| 341 | if (index.buffer) { | ||
| 342 | BindStatic<N, true, false>(scheduler); | ||
| 343 | } else { | ||
| 344 | BindStatic<N, false, false>(scheduler); | ||
| 345 | } | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | template <size_t N, bool is_indexed, bool has_extended_dynamic_state> | ||
| 350 | void BindStatic(VKScheduler& scheduler) const { | ||
| 351 | static_assert(N <= Maxwell::NumVertexArrays); | ||
| 352 | if constexpr (N == 0) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | |||
| 356 | std::array<VkBuffer, N> buffers; | ||
| 357 | std::array<VkDeviceSize, N> offsets; | ||
| 358 | std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); | ||
| 359 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||
| 360 | |||
| 361 | if constexpr (has_extended_dynamic_state) { | ||
| 362 | // With extended dynamic states we can specify the length and stride of a vertex buffer | ||
| 363 | std::array<VkDeviceSize, N> sizes; | ||
| 364 | std::array<u16, N> strides; | ||
| 365 | std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); | ||
| 366 | std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); | ||
| 367 | |||
| 368 | if constexpr (is_indexed) { | ||
| 369 | scheduler.Record( | ||
| 370 | [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) { | ||
| 371 | cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||
| 372 | cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||
| 373 | offsets.data(), sizes.data(), | ||
| 374 | ExpandStrides(strides).data()); | ||
| 375 | }); | ||
| 376 | } else { | ||
| 377 | scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) { | ||
| 378 | cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||
| 379 | offsets.data(), sizes.data(), | ||
| 380 | ExpandStrides(strides).data()); | ||
| 381 | }); | ||
| 382 | } | ||
| 383 | return; | ||
| 384 | } | ||
| 385 | |||
| 386 | if constexpr (is_indexed) { | ||
| 387 | // Indexed draw | ||
| 388 | scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { | ||
| 389 | cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||
| 390 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||
| 391 | }); | ||
| 392 | } else { | ||
| 393 | // Array draw | ||
| 394 | scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { | ||
| 395 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||
| 396 | }); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | }; | ||
| 400 | |||
| 401 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { | ||
| 402 | if (is_indexed) { | ||
| 403 | cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); | ||
| 404 | } else { | ||
| 405 | cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); | ||
| 406 | } | 227 | } |
| 228 | return params; | ||
| 407 | } | 229 | } |
| 230 | } // Anonymous namespace | ||
| 408 | 231 | ||
| 409 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 232 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 410 | Tegra::MemoryManager& gpu_memory_, | 233 | Tegra::MemoryManager& gpu_memory_, |
| @@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 414 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, | 237 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, |
| 415 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, | 238 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, |
| 416 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, | 239 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, |
| 417 | state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), | 240 | state_tracker{state_tracker_}, scheduler{scheduler_}, |
| 418 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 241 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 419 | update_descriptor_queue(device, scheduler), | 242 | update_descriptor_queue(device, scheduler), |
| 420 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 421 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 422 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 423 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 424 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, | 244 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, |
| 425 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 245 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 246 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||
| 247 | update_descriptor_queue, descriptor_pool), | ||
| 248 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||
| 426 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 249 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 427 | descriptor_pool, update_descriptor_queue), | 250 | descriptor_pool, update_descriptor_queue), |
| 428 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler, | ||
| 429 | stream_buffer, staging_pool), | ||
| 430 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | 251 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, |
| 431 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), | 252 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 432 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 253 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |
| 433 | scheduler.SetQueryCache(query_cache); | 254 | scheduler.SetQueryCache(query_cache); |
| 434 | if (device.UseAsynchronousShaders()) { | 255 | if (device.UseAsynchronousShaders()) { |
| @@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 449 | GraphicsPipelineCacheKey key; | 270 | GraphicsPipelineCacheKey key; |
| 450 | key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); | 271 | key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); |
| 451 | 272 | ||
| 452 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | 273 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 453 | |||
| 454 | BufferBindings buffer_bindings; | ||
| 455 | const DrawParameters draw_params = | ||
| 456 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||
| 457 | 274 | ||
| 458 | auto lock = texture_cache.AcquireLock(); | ||
| 459 | texture_cache.SynchronizeGraphicsDescriptors(); | 275 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 460 | |||
| 461 | texture_cache.UpdateRenderTargets(false); | 276 | texture_cache.UpdateRenderTargets(false); |
| 462 | 277 | ||
| 463 | const auto shaders = pipeline_cache.GetShaders(); | 278 | const auto shaders = pipeline_cache.GetShaders(); |
| 464 | key.shaders = GetShaderAddresses(shaders); | 279 | key.shaders = GetShaderAddresses(shaders); |
| 465 | SetupShaderDescriptors(shaders); | 280 | SetupShaderDescriptors(shaders, is_indexed); |
| 466 | |||
| 467 | buffer_cache.Unmap(); | ||
| 468 | 281 | ||
| 469 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | 282 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 470 | key.renderpass = framebuffer->RenderPass(); | 283 | key.renderpass = framebuffer->RenderPass(); |
| @@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 476 | return; | 289 | return; |
| 477 | } | 290 | } |
| 478 | 291 | ||
| 479 | buffer_bindings.Bind(device, scheduler); | ||
| 480 | |||
| 481 | BeginTransformFeedback(); | 292 | BeginTransformFeedback(); |
| 482 | 293 | ||
| 483 | scheduler.RequestRenderpass(framebuffer); | 294 | scheduler.RequestRenderpass(framebuffer); |
| 484 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | 295 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); |
| 485 | UpdateDynamicStates(); | 296 | UpdateDynamicStates(); |
| 486 | 297 | ||
| 487 | const auto pipeline_layout = pipeline->GetLayout(); | 298 | const auto& regs = maxwell3d.regs; |
| 488 | const auto descriptor_set = pipeline->CommitDescriptorSet(); | 299 | const u32 num_instances = maxwell3d.mme_draw.instance_count; |
| 300 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | ||
| 301 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | ||
| 302 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 489 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | 303 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |
| 490 | if (descriptor_set) { | 304 | if (descriptor_set) { |
| 491 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | 305 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, |
| 492 | DESCRIPTOR_SET, descriptor_set, {}); | 306 | DESCRIPTOR_SET, descriptor_set, nullptr); |
| 307 | } | ||
| 308 | if (draw_params.is_indexed) { | ||
| 309 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||
| 310 | draw_params.base_vertex, draw_params.base_instance); | ||
| 311 | } else { | ||
| 312 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||
| 313 | draw_params.base_vertex, draw_params.base_instance); | ||
| 493 | } | 314 | } |
| 494 | draw_params.Draw(cmdbuf); | ||
| 495 | }); | 315 | }); |
| 496 | 316 | ||
| 497 | EndTransformFeedback(); | 317 | EndTransformFeedback(); |
| @@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() { | |||
| 515 | return; | 335 | return; |
| 516 | } | 336 | } |
| 517 | 337 | ||
| 518 | auto lock = texture_cache.AcquireLock(); | 338 | std::scoped_lock lock{texture_cache.mutex}; |
| 519 | texture_cache.UpdateRenderTargets(true); | 339 | texture_cache.UpdateRenderTargets(true); |
| 520 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | 340 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 521 | const VkExtent2D render_area = framebuffer->RenderArea(); | 341 | const VkExtent2D render_area = framebuffer->RenderArea(); |
| @@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() { | |||
| 559 | if (use_stencil) { | 379 | if (use_stencil) { |
| 560 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; | 380 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; |
| 561 | } | 381 | } |
| 562 | |||
| 563 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | 382 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, |
| 564 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { | 383 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { |
| 565 | VkClearAttachment attachment; | 384 | VkClearAttachment attachment; |
| @@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 580 | auto& pipeline = pipeline_cache.GetComputePipeline({ | 399 | auto& pipeline = pipeline_cache.GetComputePipeline({ |
| 581 | .shader = code_addr, | 400 | .shader = code_addr, |
| 582 | .shared_memory_size = launch_desc.shared_alloc, | 401 | .shared_memory_size = launch_desc.shared_alloc, |
| 583 | .workgroup_size = | 402 | .workgroup_size{ |
| 584 | { | 403 | launch_desc.block_dim_x, |
| 585 | launch_desc.block_dim_x, | 404 | launch_desc.block_dim_y, |
| 586 | launch_desc.block_dim_y, | 405 | launch_desc.block_dim_z, |
| 587 | launch_desc.block_dim_z, | 406 | }, |
| 588 | }, | ||
| 589 | }); | 407 | }); |
| 590 | 408 | ||
| 591 | // Compute dispatches can't be executed inside a renderpass | 409 | // Compute dispatches can't be executed inside a renderpass |
| @@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 594 | image_view_indices.clear(); | 412 | image_view_indices.clear(); |
| 595 | sampler_handles.clear(); | 413 | sampler_handles.clear(); |
| 596 | 414 | ||
| 597 | auto lock = texture_cache.AcquireLock(); | 415 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 598 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 599 | 416 | ||
| 600 | const auto& entries = pipeline.GetEntries(); | 417 | const auto& entries = pipeline.GetEntries(); |
| 418 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 419 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 420 | u32 ssbo_index = 0; | ||
| 421 | for (const auto& buffer : entries.global_buffers) { | ||
| 422 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 423 | buffer.is_written); | ||
| 424 | ++ssbo_index; | ||
| 425 | } | ||
| 426 | buffer_cache.UpdateComputeBuffers(); | ||
| 427 | |||
| 428 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 429 | |||
| 601 | SetupComputeUniformTexels(entries); | 430 | SetupComputeUniformTexels(entries); |
| 602 | SetupComputeTextures(entries); | 431 | SetupComputeTextures(entries); |
| 603 | SetupComputeStorageTexels(entries); | 432 | SetupComputeStorageTexels(entries); |
| @@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 606 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 435 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 607 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | 436 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); |
| 608 | 437 | ||
| 609 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||
| 610 | |||
| 611 | update_descriptor_queue.Acquire(); | 438 | update_descriptor_queue.Acquire(); |
| 612 | 439 | ||
| 613 | SetupComputeConstBuffers(entries); | 440 | buffer_cache.BindHostComputeBuffers(); |
| 614 | SetupComputeGlobalBuffers(entries); | ||
| 615 | 441 | ||
| 616 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | 442 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| 617 | VkSampler* sampler_ptr = sampler_handles.data(); | 443 | VkSampler* sampler_ptr = sampler_handles.data(); |
| 618 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | 444 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, |
| 619 | sampler_ptr); | 445 | sampler_ptr); |
| 620 | 446 | ||
| 621 | buffer_cache.Unmap(); | ||
| 622 | |||
| 623 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | 447 | const VkPipeline pipeline_handle = pipeline.GetHandle(); |
| 624 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | 448 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); |
| 625 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | 449 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); |
| @@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 644 | query_cache.Query(gpu_addr, type, timestamp); | 468 | query_cache.Query(gpu_addr, type, timestamp); |
| 645 | } | 469 | } |
| 646 | 470 | ||
| 471 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 472 | u32 size) { | ||
| 473 | buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||
| 474 | } | ||
| 475 | |||
| 647 | void RasterizerVulkan::FlushAll() {} | 476 | void RasterizerVulkan::FlushAll() {} |
| 648 | 477 | ||
| 649 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | 478 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { |
| @@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 651 | return; | 480 | return; |
| 652 | } | 481 | } |
| 653 | { | 482 | { |
| 654 | auto lock = texture_cache.AcquireLock(); | 483 | std::scoped_lock lock{texture_cache.mutex}; |
| 655 | texture_cache.DownloadMemory(addr, size); | 484 | texture_cache.DownloadMemory(addr, size); |
| 656 | } | 485 | } |
| 657 | buffer_cache.FlushRegion(addr, size); | 486 | { |
| 487 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 488 | buffer_cache.DownloadMemory(addr, size); | ||
| 489 | } | ||
| 658 | query_cache.FlushRegion(addr, size); | 490 | query_cache.FlushRegion(addr, size); |
| 659 | } | 491 | } |
| 660 | 492 | ||
| 661 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | 493 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { |
| 494 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | ||
| 662 | if (!Settings::IsGPULevelHigh()) { | 495 | if (!Settings::IsGPULevelHigh()) { |
| 663 | return buffer_cache.MustFlushRegion(addr, size); | 496 | return buffer_cache.IsRegionGpuModified(addr, size); |
| 664 | } | 497 | } |
| 665 | return texture_cache.IsRegionGpuModified(addr, size) || | 498 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 666 | buffer_cache.MustFlushRegion(addr, size); | 499 | buffer_cache.IsRegionGpuModified(addr, size); |
| 667 | } | 500 | } |
| 668 | 501 | ||
| 669 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 502 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | |||
| 671 | return; | 504 | return; |
| 672 | } | 505 | } |
| 673 | { | 506 | { |
| 674 | auto lock = texture_cache.AcquireLock(); | 507 | std::scoped_lock lock{texture_cache.mutex}; |
| 675 | texture_cache.WriteMemory(addr, size); | 508 | texture_cache.WriteMemory(addr, size); |
| 676 | } | 509 | } |
| 510 | { | ||
| 511 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 512 | buffer_cache.WriteMemory(addr, size); | ||
| 513 | } | ||
| 677 | pipeline_cache.InvalidateRegion(addr, size); | 514 | pipeline_cache.InvalidateRegion(addr, size); |
| 678 | buffer_cache.InvalidateRegion(addr, size); | ||
| 679 | query_cache.InvalidateRegion(addr, size); | 515 | query_cache.InvalidateRegion(addr, size); |
| 680 | } | 516 | } |
| 681 | 517 | ||
| @@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 683 | if (addr == 0 || size == 0) { | 519 | if (addr == 0 || size == 0) { |
| 684 | return; | 520 | return; |
| 685 | } | 521 | } |
| 522 | pipeline_cache.OnCPUWrite(addr, size); | ||
| 686 | { | 523 | { |
| 687 | auto lock = texture_cache.AcquireLock(); | 524 | std::scoped_lock lock{texture_cache.mutex}; |
| 688 | texture_cache.WriteMemory(addr, size); | 525 | texture_cache.WriteMemory(addr, size); |
| 689 | } | 526 | } |
| 690 | pipeline_cache.OnCPUWrite(addr, size); | 527 | { |
| 691 | buffer_cache.OnCPUWrite(addr, size); | 528 | std::scoped_lock lock{buffer_cache.mutex}; |
| 529 | buffer_cache.CachedWriteMemory(addr, size); | ||
| 530 | } | ||
| 692 | } | 531 | } |
| 693 | 532 | ||
| 694 | void RasterizerVulkan::SyncGuestHost() { | 533 | void RasterizerVulkan::SyncGuestHost() { |
| 695 | buffer_cache.SyncGuestHost(); | ||
| 696 | pipeline_cache.SyncGuestHost(); | 534 | pipeline_cache.SyncGuestHost(); |
| 535 | { | ||
| 536 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 537 | buffer_cache.FlushCachedWrites(); | ||
| 538 | } | ||
| 697 | } | 539 | } |
| 698 | 540 | ||
| 699 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | 541 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { |
| 700 | { | 542 | { |
| 701 | auto lock = texture_cache.AcquireLock(); | 543 | std::scoped_lock lock{texture_cache.mutex}; |
| 702 | texture_cache.UnmapMemory(addr, size); | 544 | texture_cache.UnmapMemory(addr, size); |
| 703 | } | 545 | } |
| 704 | buffer_cache.OnCPUWrite(addr, size); | 546 | { |
| 547 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 548 | buffer_cache.WriteMemory(addr, size); | ||
| 549 | } | ||
| 705 | pipeline_cache.OnCPUWrite(addr, size); | 550 | pipeline_cache.OnCPUWrite(addr, size); |
| 706 | } | 551 | } |
| 707 | 552 | ||
| @@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() { | |||
| 774 | draw_counter = 0; | 619 | draw_counter = 0; |
| 775 | update_descriptor_queue.TickFrame(); | 620 | update_descriptor_queue.TickFrame(); |
| 776 | fence_manager.TickFrame(); | 621 | fence_manager.TickFrame(); |
| 777 | buffer_cache.TickFrame(); | ||
| 778 | staging_pool.TickFrame(); | 622 | staging_pool.TickFrame(); |
| 779 | { | 623 | { |
| 780 | auto lock = texture_cache.AcquireLock(); | 624 | std::scoped_lock lock{texture_cache.mutex}; |
| 781 | texture_cache.TickFrame(); | 625 | texture_cache.TickFrame(); |
| 782 | } | 626 | } |
| 627 | { | ||
| 628 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 629 | buffer_cache.TickFrame(); | ||
| 630 | } | ||
| 783 | } | 631 | } |
| 784 | 632 | ||
| 785 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 633 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 786 | const Tegra::Engines::Fermi2D::Surface& dst, | 634 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 787 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 635 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 788 | auto lock = texture_cache.AcquireLock(); | 636 | std::scoped_lock lock{texture_cache.mutex}; |
| 789 | texture_cache.BlitImage(dst, src, copy_config); | 637 | texture_cache.BlitImage(dst, src, copy_config); |
| 790 | return true; | 638 | return true; |
| 791 | } | 639 | } |
| @@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 795 | if (!framebuffer_addr) { | 643 | if (!framebuffer_addr) { |
| 796 | return false; | 644 | return false; |
| 797 | } | 645 | } |
| 798 | 646 | std::scoped_lock lock{texture_cache.mutex}; | |
| 799 | auto lock = texture_cache.AcquireLock(); | ||
| 800 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); | 647 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); |
| 801 | if (!image_view) { | 648 | if (!image_view) { |
| 802 | return false; | 649 | return false; |
| 803 | } | 650 | } |
| 804 | |||
| 805 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); | 651 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); |
| 806 | screen_info.width = image_view->size.width; | 652 | screen_info.width = image_view->size.width; |
| 807 | screen_info.height = image_view->size.height; | 653 | screen_info.height = image_view->size.height; |
| @@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() { | |||
| 830 | draw_counter = 0; | 676 | draw_counter = 0; |
| 831 | } | 677 | } |
| 832 | 678 | ||
| 833 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||
| 834 | BufferBindings& buffer_bindings, | ||
| 835 | bool is_indexed, | ||
| 836 | bool is_instanced) { | ||
| 837 | MICROPROFILE_SCOPE(Vulkan_Geometry); | ||
| 838 | |||
| 839 | const auto& regs = maxwell3d.regs; | ||
| 840 | |||
| 841 | SetupVertexArrays(buffer_bindings); | ||
| 842 | |||
| 843 | const u32 base_instance = regs.vb_base_instance; | ||
| 844 | const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; | ||
| 845 | const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||
| 846 | const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||
| 847 | |||
| 848 | DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||
| 849 | SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||
| 850 | |||
| 851 | return params; | ||
| 852 | } | ||
| 853 | |||
| 854 | void RasterizerVulkan::SetupShaderDescriptors( | 679 | void RasterizerVulkan::SetupShaderDescriptors( |
| 855 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 680 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { |
| 856 | image_view_indices.clear(); | 681 | image_view_indices.clear(); |
| 857 | sampler_handles.clear(); | 682 | sampler_handles.clear(); |
| 858 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 683 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| @@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 860 | if (!shader) { | 685 | if (!shader) { |
| 861 | continue; | 686 | continue; |
| 862 | } | 687 | } |
| 863 | const auto& entries = shader->GetEntries(); | 688 | const ShaderEntries& entries = shader->GetEntries(); |
| 864 | SetupGraphicsUniformTexels(entries, stage); | 689 | SetupGraphicsUniformTexels(entries, stage); |
| 865 | SetupGraphicsTextures(entries, stage); | 690 | SetupGraphicsTextures(entries, stage); |
| 866 | SetupGraphicsStorageTexels(entries, stage); | 691 | SetupGraphicsStorageTexels(entries, stage); |
| 867 | SetupGraphicsImages(entries, stage); | 692 | SetupGraphicsImages(entries, stage); |
| 693 | |||
| 694 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 695 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 696 | u32 ssbo_index = 0; | ||
| 697 | for (const auto& buffer : entries.global_buffers) { | ||
| 698 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 699 | buffer.cbuf_offset, buffer.is_written); | ||
| 700 | ++ssbo_index; | ||
| 701 | } | ||
| 868 | } | 702 | } |
| 869 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 703 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 704 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 870 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | 705 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 871 | 706 | ||
| 707 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 708 | |||
| 872 | update_descriptor_queue.Acquire(); | 709 | update_descriptor_queue.Acquire(); |
| 873 | 710 | ||
| 874 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | 711 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| @@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 879 | if (!shader) { | 716 | if (!shader) { |
| 880 | continue; | 717 | continue; |
| 881 | } | 718 | } |
| 882 | const auto& entries = shader->GetEntries(); | 719 | buffer_cache.BindHostStageBuffers(stage); |
| 883 | SetupGraphicsConstBuffers(entries, stage); | 720 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, |
| 884 | SetupGraphicsGlobalBuffers(entries, stage); | 721 | image_view_id_ptr, sampler_ptr); |
| 885 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 886 | sampler_ptr); | ||
| 887 | } | 722 | } |
| 888 | } | 723 | } |
| 889 | 724 | ||
| @@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 916 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | 751 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); |
| 917 | return; | 752 | return; |
| 918 | } | 753 | } |
| 919 | |||
| 920 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 754 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 921 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 755 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 922 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 756 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| 923 | 757 | scheduler.Record( | |
| 924 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | 758 | [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 925 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 926 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 927 | |||
| 928 | const auto& binding = regs.tfb_bindings[0]; | ||
| 929 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 930 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 931 | |||
| 932 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 933 | const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); | ||
| 934 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 935 | |||
| 936 | scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { | ||
| 937 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); | ||
| 938 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 939 | }); | ||
| 940 | } | 759 | } |
| 941 | 760 | ||
| 942 | void RasterizerVulkan::EndTransformFeedback() { | 761 | void RasterizerVulkan::EndTransformFeedback() { |
| @@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 947 | if (!device.IsExtTransformFeedbackSupported()) { | 766 | if (!device.IsExtTransformFeedbackSupported()) { |
| 948 | return; | 767 | return; |
| 949 | } | 768 | } |
| 950 | |||
| 951 | scheduler.Record( | 769 | scheduler.Record( |
| 952 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 770 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 953 | } | 771 | } |
| 954 | 772 | ||
| 955 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | ||
| 956 | const auto& regs = maxwell3d.regs; | ||
| 957 | |||
| 958 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 959 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 960 | if (!vertex_array.IsEnabled()) { | ||
| 961 | continue; | ||
| 962 | } | ||
| 963 | const GPUVAddr start{vertex_array.StartAddress()}; | ||
| 964 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 965 | |||
| 966 | ASSERT(end >= start); | ||
| 967 | const size_t size = end - start; | ||
| 968 | if (size == 0) { | ||
| 969 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); | ||
| 970 | continue; | ||
| 971 | } | ||
| 972 | const auto info = buffer_cache.UploadMemory(start, size); | ||
| 973 | buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||
| 978 | bool is_indexed) { | ||
| 979 | if (params.num_vertices == 0) { | ||
| 980 | return; | ||
| 981 | } | ||
| 982 | const auto& regs = maxwell3d.regs; | ||
| 983 | switch (regs.draw.topology) { | ||
| 984 | case Maxwell::PrimitiveTopology::Quads: { | ||
| 985 | if (!params.is_indexed) { | ||
| 986 | const auto [buffer, offset] = | ||
| 987 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||
| 988 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 989 | params.base_vertex = 0; | ||
| 990 | params.num_vertices = params.num_vertices * 6 / 4; | ||
| 991 | params.is_indexed = true; | ||
| 992 | break; | ||
| 993 | } | ||
| 994 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 995 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 996 | VkBuffer buffer = info.handle; | ||
| 997 | u64 offset = info.offset; | ||
| 998 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( | ||
| 999 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | ||
| 1000 | |||
| 1001 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 1002 | params.num_vertices = (params.num_vertices / 4) * 6; | ||
| 1003 | params.base_vertex = 0; | ||
| 1004 | break; | ||
| 1005 | } | ||
| 1006 | default: { | ||
| 1007 | if (!is_indexed) { | ||
| 1008 | break; | ||
| 1009 | } | ||
| 1010 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 1011 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 1012 | VkBuffer buffer = info.handle; | ||
| 1013 | u64 offset = info.offset; | ||
| 1014 | |||
| 1015 | auto format = regs.index_array.format; | ||
| 1016 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||
| 1017 | if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||
| 1018 | std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); | ||
| 1019 | format = Maxwell::IndexFormat::UnsignedShort; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||
| 1023 | break; | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { | ||
| 1029 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 1030 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; | ||
| 1031 | for (const auto& entry : entries.const_buffers) { | ||
| 1032 | SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { | ||
| 1037 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 1038 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; | ||
| 1039 | |||
| 1040 | for (const auto& entry : entries.global_buffers) { | ||
| 1041 | const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||
| 1042 | SetupGlobalBuffer(entry, addr); | ||
| 1043 | } | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | 773 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { |
| 1047 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1048 | const auto& regs = maxwell3d.regs; | 774 | const auto& regs = maxwell3d.regs; |
| 1049 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 775 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1050 | for (const auto& entry : entries.uniform_texels) { | 776 | for (const auto& entry : entries.uniform_texels) { |
| @@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, | |||
| 1054 | } | 780 | } |
| 1055 | 781 | ||
| 1056 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | 782 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { |
| 1057 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1058 | const auto& regs = maxwell3d.regs; | 783 | const auto& regs = maxwell3d.regs; |
| 1059 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 784 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1060 | for (const auto& entry : entries.samplers) { | 785 | for (const auto& entry : entries.samplers) { |
| @@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_ | |||
| 1070 | } | 795 | } |
| 1071 | 796 | ||
| 1072 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | 797 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { |
| 1073 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1074 | const auto& regs = maxwell3d.regs; | 798 | const auto& regs = maxwell3d.regs; |
| 1075 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 799 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1076 | for (const auto& entry : entries.storage_texels) { | 800 | for (const auto& entry : entries.storage_texels) { |
| @@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, | |||
| 1080 | } | 804 | } |
| 1081 | 805 | ||
| 1082 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | 806 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { |
| 1083 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 1084 | const auto& regs = maxwell3d.regs; | 807 | const auto& regs = maxwell3d.regs; |
| 1085 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 808 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1086 | for (const auto& entry : entries.images) { | 809 | for (const auto& entry : entries.images) { |
| @@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t | |||
| 1089 | } | 812 | } |
| 1090 | } | 813 | } |
| 1091 | 814 | ||
| 1092 | void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||
| 1093 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 1094 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 1095 | for (const auto& entry : entries.const_buffers) { | ||
| 1096 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 1097 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 1098 | const Tegra::Engines::ConstBufferInfo info{ | ||
| 1099 | .address = config.Address(), | ||
| 1100 | .size = config.size, | ||
| 1101 | .enabled = mask[entry.GetIndex()], | ||
| 1102 | }; | ||
| 1103 | SetupConstBuffer(entry, info); | ||
| 1104 | } | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||
| 1108 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 1109 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||
| 1110 | for (const auto& entry : entries.global_buffers) { | ||
| 1111 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 1112 | SetupGlobalBuffer(entry, addr); | ||
| 1113 | } | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | 815 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1117 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1118 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 816 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1119 | for (const auto& entry : entries.uniform_texels) { | 817 | for (const auto& entry : entries.uniform_texels) { |
| 1120 | const TextureHandle handle = | 818 | const TextureHandle handle = |
| @@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | |||
| 1124 | } | 822 | } |
| 1125 | 823 | ||
| 1126 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | 824 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { |
| 1127 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1128 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 825 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1129 | for (const auto& entry : entries.samplers) { | 826 | for (const auto& entry : entries.samplers) { |
| 1130 | for (size_t index = 0; index < entry.size; ++index) { | 827 | for (size_t index = 0; index < entry.size; ++index) { |
| @@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| 1139 | } | 836 | } |
| 1140 | 837 | ||
| 1141 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | 838 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { |
| 1142 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1143 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 839 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1144 | for (const auto& entry : entries.storage_texels) { | 840 | for (const auto& entry : entries.storage_texels) { |
| 1145 | const TextureHandle handle = | 841 | const TextureHandle handle = |
| @@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | |||
| 1149 | } | 845 | } |
| 1150 | 846 | ||
| 1151 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 847 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1152 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 1153 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 848 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1154 | for (const auto& entry : entries.images) { | 849 | for (const auto& entry : entries.images) { |
| 1155 | const TextureHandle handle = | 850 | const TextureHandle handle = |
| @@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | |||
| 1158 | } | 853 | } |
| 1159 | } | 854 | } |
| 1160 | 855 | ||
| 1161 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 1162 | const Tegra::Engines::ConstBufferInfo& buffer) { | ||
| 1163 | if (!buffer.enabled) { | ||
| 1164 | // Set values to zero to unbind buffers | ||
| 1165 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); | ||
| 1166 | return; | ||
| 1167 | } | ||
| 1168 | // Align the size to avoid bad std140 interactions | ||
| 1169 | const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 1170 | ASSERT(size <= MaxConstbufferSize); | ||
| 1171 | |||
| 1172 | const u64 alignment = device.GetUniformBufferAlignment(); | ||
| 1173 | const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); | ||
| 1174 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||
| 1178 | const u64 actual_addr = gpu_memory.Read<u64>(address); | ||
| 1179 | const u32 size = gpu_memory.Read<u32>(address + 8); | ||
| 1180 | |||
| 1181 | if (size == 0) { | ||
| 1182 | // Sometimes global memory pointers don't have a proper size. Upload a dummy entry | ||
| 1183 | // because Vulkan doesn't like empty buffers. | ||
| 1184 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | ||
| 1185 | // default buffer. | ||
| 1186 | static constexpr size_t dummy_size = 4; | ||
| 1187 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); | ||
| 1188 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); | ||
| 1189 | return; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | const auto info = buffer_cache.UploadMemory( | ||
| 1193 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||
| 1194 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 856 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1198 | if (!state_tracker.TouchViewports()) { | 857 | if (!state_tracker.TouchViewports()) { |
| 1199 | return; | 858 | return; |
| @@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 1206 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | 865 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), |
| 1207 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | 866 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), |
| 1208 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | 867 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), |
| 1209 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | 868 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), |
| 869 | }; | ||
| 1210 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | 870 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); |
| 1211 | } | 871 | } |
| 1212 | 872 | ||
| @@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 1214 | if (!state_tracker.TouchScissors()) { | 874 | if (!state_tracker.TouchScissors()) { |
| 1215 | return; | 875 | return; |
| 1216 | } | 876 | } |
| 1217 | const std::array scissors = { | 877 | const std::array scissors{ |
| 1218 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | 878 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), |
| 1219 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | 879 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), |
| 1220 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | 880 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), |
| 1221 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | 881 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), |
| 1222 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | 882 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), |
| 1223 | GetScissorState(regs, 15)}; | 883 | GetScissorState(regs, 15), |
| 884 | }; | ||
| 1224 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | 885 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); |
| 1225 | } | 886 | } |
| 1226 | 887 | ||
| @@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1385 | }); | 1046 | }); |
| 1386 | } | 1047 | } |
| 1387 | 1048 | ||
| 1388 | size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||
| 1389 | size_t size = CalculateVertexArraysSize(); | ||
| 1390 | if (is_indexed) { | ||
| 1391 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||
| 1392 | } | ||
| 1393 | size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 1394 | return size; | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||
| 1398 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 1399 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||
| 1403 | const auto& regs = maxwell3d.regs; | ||
| 1404 | |||
| 1405 | size_t size = 0; | ||
| 1406 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 1407 | // This implementation assumes that all attributes are used in the shader. | ||
| 1408 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||
| 1409 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 1410 | DEBUG_ASSERT(end >= start); | ||
| 1411 | |||
| 1412 | size += (end - start) * regs.vertex_array[index].enable; | ||
| 1413 | } | ||
| 1414 | return size; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||
| 1418 | return static_cast<size_t>(maxwell3d.regs.index_array.count) * | ||
| 1419 | static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | size_t RasterizerVulkan::CalculateConstBufferSize( | ||
| 1423 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||
| 1424 | if (entry.IsIndirect()) { | ||
| 1425 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 1426 | return buffer.size; | ||
| 1427 | } else { | ||
| 1428 | // Buffer is accessed directly, upload just what we use | ||
| 1429 | return entry.GetSize(); | ||
| 1430 | } | ||
| 1431 | } | ||
| 1432 | |||
| 1433 | VkBuffer RasterizerVulkan::DefaultBuffer() { | ||
| 1434 | if (default_buffer) { | ||
| 1435 | return *default_buffer; | ||
| 1436 | } | ||
| 1437 | default_buffer = device.GetLogical().CreateBuffer({ | ||
| 1438 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 1439 | .pNext = nullptr, | ||
| 1440 | .flags = 0, | ||
| 1441 | .size = DEFAULT_BUFFER_SIZE, | ||
| 1442 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | ||
| 1443 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, | ||
| 1444 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 1445 | .queueFamilyIndexCount = 0, | ||
| 1446 | .pQueueFamilyIndices = nullptr, | ||
| 1447 | }); | ||
| 1448 | default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal); | ||
| 1449 | |||
| 1450 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 1451 | scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { | ||
| 1452 | cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0); | ||
| 1453 | }); | ||
| 1454 | return *default_buffer; | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | } // namespace Vulkan | 1049 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 8e261b9bd..7fc6741da 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -18,14 +18,12 @@ | |||
| 18 | #include "video_core/renderer_vulkan/blit_image.h" | 18 | #include "video_core/renderer_vulkan/blit_image.h" |
| 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 22 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 28 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 28 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | 29 | #include "video_core/shader/async_shaders.h" |
| @@ -49,7 +47,6 @@ namespace Vulkan { | |||
| 49 | struct VKScreenInfo; | 47 | struct VKScreenInfo; |
| 50 | 48 | ||
| 51 | class StateTracker; | 49 | class StateTracker; |
| 52 | class BufferBindings; | ||
| 53 | 50 | ||
| 54 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 51 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 55 | public: | 52 | public: |
| @@ -65,6 +62,7 @@ public: | |||
| 65 | void DispatchCompute(GPUVAddr code_addr) override; | 62 | void DispatchCompute(GPUVAddr code_addr) override; |
| 66 | void ResetCounter(VideoCore::QueryType type) override; | 63 | void ResetCounter(VideoCore::QueryType type) override; |
| 67 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 64 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 65 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||
| 68 | void FlushAll() override; | 66 | void FlushAll() override; |
| 69 | void FlushRegion(VAddr addr, u64 size) override; | 67 | void FlushRegion(VAddr addr, u64 size) override; |
| 70 | bool MustFlushRegion(VAddr addr, u64 size) override; | 68 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| @@ -107,24 +105,11 @@ private: | |||
| 107 | 105 | ||
| 108 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | 106 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); |
| 109 | 107 | ||
| 110 | struct DrawParameters { | ||
| 111 | void Draw(vk::CommandBuffer cmdbuf) const; | ||
| 112 | |||
| 113 | u32 base_instance = 0; | ||
| 114 | u32 num_instances = 0; | ||
| 115 | u32 base_vertex = 0; | ||
| 116 | u32 num_vertices = 0; | ||
| 117 | bool is_indexed = 0; | ||
| 118 | }; | ||
| 119 | |||
| 120 | void FlushWork(); | 108 | void FlushWork(); |
| 121 | 109 | ||
| 122 | /// Setups geometry buffers and state. | ||
| 123 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||
| 124 | bool is_indexed, bool is_instanced); | ||
| 125 | |||
| 126 | /// Setup descriptors in the graphics pipeline. | 110 | /// Setup descriptors in the graphics pipeline. |
| 127 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); | 111 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, |
| 112 | bool is_indexed); | ||
| 128 | 113 | ||
| 129 | void UpdateDynamicStates(); | 114 | void UpdateDynamicStates(); |
| 130 | 115 | ||
| @@ -132,16 +117,6 @@ private: | |||
| 132 | 117 | ||
| 133 | void EndTransformFeedback(); | 118 | void EndTransformFeedback(); |
| 134 | 119 | ||
| 135 | void SetupVertexArrays(BufferBindings& buffer_bindings); | ||
| 136 | |||
| 137 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||
| 138 | |||
| 139 | /// Setup constant buffers in the graphics pipeline. | ||
| 140 | void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 141 | |||
| 142 | /// Setup global buffers in the graphics pipeline. | ||
| 143 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 144 | |||
| 145 | /// Setup uniform texels in the graphics pipeline. | 120 | /// Setup uniform texels in the graphics pipeline. |
| 146 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | 121 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); |
| 147 | 122 | ||
| @@ -154,12 +129,6 @@ private: | |||
| 154 | /// Setup images in the graphics pipeline. | 129 | /// Setup images in the graphics pipeline. |
| 155 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | 130 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); |
| 156 | 131 | ||
| 157 | /// Setup constant buffers in the compute pipeline. | ||
| 158 | void SetupComputeConstBuffers(const ShaderEntries& entries); | ||
| 159 | |||
| 160 | /// Setup global buffers in the compute pipeline. | ||
| 161 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||
| 162 | |||
| 163 | /// Setup texel buffers in the compute pipeline. | 132 | /// Setup texel buffers in the compute pipeline. |
| 164 | void SetupComputeUniformTexels(const ShaderEntries& entries); | 133 | void SetupComputeUniformTexels(const ShaderEntries& entries); |
| 165 | 134 | ||
| @@ -172,11 +141,6 @@ private: | |||
| 172 | /// Setup images in the compute pipeline. | 141 | /// Setup images in the compute pipeline. |
| 173 | void SetupComputeImages(const ShaderEntries& entries); | 142 | void SetupComputeImages(const ShaderEntries& entries); |
| 174 | 143 | ||
| 175 | void SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 176 | const Tegra::Engines::ConstBufferInfo& buffer); | ||
| 177 | |||
| 178 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||
| 179 | |||
| 180 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 144 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 181 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 145 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 182 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 146 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -193,19 +157,6 @@ private: | |||
| 193 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 157 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 194 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 158 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 195 | 159 | ||
| 196 | size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||
| 197 | |||
| 198 | size_t CalculateComputeStreamBufferSize() const; | ||
| 199 | |||
| 200 | size_t CalculateVertexArraysSize() const; | ||
| 201 | |||
| 202 | size_t CalculateIndexBufferSize() const; | ||
| 203 | |||
| 204 | size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||
| 205 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 206 | |||
| 207 | VkBuffer DefaultBuffer(); | ||
| 208 | |||
| 209 | Tegra::GPU& gpu; | 160 | Tegra::GPU& gpu; |
| 210 | Tegra::MemoryManager& gpu_memory; | 161 | Tegra::MemoryManager& gpu_memory; |
| 211 | Tegra::Engines::Maxwell3D& maxwell3d; | 162 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -217,24 +168,19 @@ private: | |||
| 217 | StateTracker& state_tracker; | 168 | StateTracker& state_tracker; |
| 218 | VKScheduler& scheduler; | 169 | VKScheduler& scheduler; |
| 219 | 170 | ||
| 220 | VKStreamBuffer stream_buffer; | ||
| 221 | StagingBufferPool staging_pool; | 171 | StagingBufferPool staging_pool; |
| 222 | VKDescriptorPool descriptor_pool; | 172 | VKDescriptorPool descriptor_pool; |
| 223 | VKUpdateDescriptorQueue update_descriptor_queue; | 173 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 224 | BlitImageHelper blit_image; | 174 | BlitImageHelper blit_image; |
| 225 | QuadArrayPass quad_array_pass; | ||
| 226 | QuadIndexedPass quad_indexed_pass; | ||
| 227 | Uint8Pass uint8_pass; | ||
| 228 | 175 | ||
| 229 | TextureCacheRuntime texture_cache_runtime; | 176 | TextureCacheRuntime texture_cache_runtime; |
| 230 | TextureCache texture_cache; | 177 | TextureCache texture_cache; |
| 178 | BufferCacheRuntime buffer_cache_runtime; | ||
| 179 | BufferCache buffer_cache; | ||
| 231 | VKPipelineCache pipeline_cache; | 180 | VKPipelineCache pipeline_cache; |
| 232 | VKBufferCache buffer_cache; | ||
| 233 | VKQueryCache query_cache; | 181 | VKQueryCache query_cache; |
| 234 | VKFenceManager fence_manager; | 182 | VKFenceManager fence_manager; |
| 235 | 183 | ||
| 236 | vk::Buffer default_buffer; | ||
| 237 | MemoryCommit default_buffer_commit; | ||
| 238 | vk::Event wfi_event; | 184 | vk::Event wfi_event; |
| 239 | VideoCommon::Shader::AsyncShaders async_shaders; | 185 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 240 | 186 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 66004f9c0..f35c120b0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() { | |||
| 52 | worker_thread.join(); | 52 | worker_thread.join(); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | u64 VKScheduler::CurrentTick() const noexcept { | ||
| 56 | return master_semaphore->CurrentTick(); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool VKScheduler::IsFree(u64 tick) const noexcept { | ||
| 60 | return master_semaphore->IsFree(tick); | ||
| 61 | } | ||
| 62 | |||
| 63 | void VKScheduler::Wait(u64 tick) { | ||
| 64 | master_semaphore->Wait(tick); | ||
| 65 | } | ||
| 66 | |||
| 67 | void VKScheduler::Flush(VkSemaphore semaphore) { | 55 | void VKScheduler::Flush(VkSemaphore semaphore) { |
| 68 | SubmitExecution(semaphore); | 56 | SubmitExecution(semaphore); |
| 69 | AllocateNewContext(); | 57 | AllocateNewContext(); |
| @@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() { | |||
| 269 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | | 257 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | |
| 270 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | | 258 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | |
| 271 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | 259 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| 272 | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, | 260 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr, |
| 273 | vk::Span(barriers.data(), num_images)); | 261 | vk::Span(barriers.data(), num_images)); |
| 274 | }); | 262 | }); |
| 275 | state.renderpass = nullptr; | 263 | state.renderpass = nullptr; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 15f2987eb..3ce48e9d2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/threadsafe_queue.h" | 16 | #include "common/threadsafe_queue.h" |
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| @@ -21,7 +22,6 @@ namespace Vulkan { | |||
| 21 | class CommandPool; | 22 | class CommandPool; |
| 22 | class Device; | 23 | class Device; |
| 23 | class Framebuffer; | 24 | class Framebuffer; |
| 24 | class MasterSemaphore; | ||
| 25 | class StateTracker; | 25 | class StateTracker; |
| 26 | class VKQueryCache; | 26 | class VKQueryCache; |
| 27 | 27 | ||
| @@ -32,15 +32,6 @@ public: | |||
| 32 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); | 32 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); |
| 33 | ~VKScheduler(); | 33 | ~VKScheduler(); |
| 34 | 34 | ||
| 35 | /// Returns the current command buffer tick. | ||
| 36 | [[nodiscard]] u64 CurrentTick() const noexcept; | ||
| 37 | |||
| 38 | /// Returns true when a tick has been triggered by the GPU. | ||
| 39 | [[nodiscard]] bool IsFree(u64 tick) const noexcept; | ||
| 40 | |||
| 41 | /// Waits for the given tick to trigger on the GPU. | ||
| 42 | void Wait(u64 tick); | ||
| 43 | |||
| 44 | /// Sends the current execution context to the GPU. | 35 | /// Sends the current execution context to the GPU. |
| 45 | void Flush(VkSemaphore semaphore = nullptr); | 36 | void Flush(VkSemaphore semaphore = nullptr); |
| 46 | 37 | ||
| @@ -82,6 +73,21 @@ public: | |||
| 82 | (void)chunk->Record(command); | 73 | (void)chunk->Record(command); |
| 83 | } | 74 | } |
| 84 | 75 | ||
| 76 | /// Returns the current command buffer tick. | ||
| 77 | [[nodiscard]] u64 CurrentTick() const noexcept { | ||
| 78 | return master_semaphore->CurrentTick(); | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Returns true when a tick has been triggered by the GPU. | ||
| 82 | [[nodiscard]] bool IsFree(u64 tick) const noexcept { | ||
| 83 | return master_semaphore->IsFree(tick); | ||
| 84 | } | ||
| 85 | |||
| 86 | /// Waits for the given tick to trigger on the GPU. | ||
| 87 | void Wait(u64 tick) { | ||
| 88 | master_semaphore->Wait(tick); | ||
| 89 | } | ||
| 90 | |||
| 85 | /// Returns the master timeline semaphore. | 91 | /// Returns the master timeline semaphore. |
| 86 | [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { | 92 | [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { |
| 87 | return *master_semaphore; | 93 | return *master_semaphore; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 61d52b961..e165a6987 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -3127,6 +3127,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3127 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | 3127 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); |
| 3128 | } | 3128 | } |
| 3129 | } | 3129 | } |
| 3130 | for (const auto& buffer : entries.const_buffers) { | ||
| 3131 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 3132 | } | ||
| 3130 | entries.clip_distances = ir.GetClipDistances(); | 3133 | entries.clip_distances = ir.GetClipDistances(); |
| 3131 | entries.shader_length = ir.GetLength(); | 3134 | entries.shader_length = ir.GetLength(); |
| 3132 | entries.uses_warps = ir.UsesWarps(); | 3135 | entries.uses_warps = ir.UsesWarps(); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 26381e444..5d94132a5 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -39,24 +39,7 @@ private: | |||
| 39 | u32 index{}; | 39 | u32 index{}; |
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | class GlobalBufferEntry { | 42 | struct GlobalBufferEntry { |
| 43 | public: | ||
| 44 | constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) | ||
| 45 | : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} | ||
| 46 | |||
| 47 | constexpr u32 GetCbufIndex() const { | ||
| 48 | return cbuf_index; | ||
| 49 | } | ||
| 50 | |||
| 51 | constexpr u32 GetCbufOffset() const { | ||
| 52 | return cbuf_offset; | ||
| 53 | } | ||
| 54 | |||
| 55 | constexpr bool IsWritten() const { | ||
| 56 | return is_written; | ||
| 57 | } | ||
| 58 | |||
| 59 | private: | ||
| 60 | u32 cbuf_index{}; | 43 | u32 cbuf_index{}; |
| 61 | u32 cbuf_offset{}; | 44 | u32 cbuf_offset{}; |
| 62 | bool is_written{}; | 45 | bool is_written{}; |
| @@ -78,6 +61,7 @@ struct ShaderEntries { | |||
| 78 | std::set<u32> attributes; | 61 | std::set<u32> attributes; |
| 79 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 62 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 80 | std::size_t shader_length{}; | 63 | std::size_t shader_length{}; |
| 64 | u32 enabled_uniform_buffers{}; | ||
| 81 | bool uses_warps{}; | 65 | bool uses_warps{}; |
| 82 | }; | 66 | }; |
| 83 | 67 | ||
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 1779a2e30..e81fad007 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table; | |||
| 30 | using Flags = Maxwell3D::DirtyState::Flags; | 30 | using Flags = Maxwell3D::DirtyState::Flags; |
| 31 | 31 | ||
| 32 | Flags MakeInvalidationFlags() { | 32 | Flags MakeInvalidationFlags() { |
| 33 | static constexpr std::array INVALIDATION_FLAGS{ | 33 | static constexpr int INVALIDATION_FLAGS[]{ |
| 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, |
| 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, |
| 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, | 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, |
| 37 | }; | 37 | }; |
| 38 | Flags flags{}; | 38 | Flags flags{}; |
| 39 | for (const int flag : INVALIDATION_FLAGS) { | 39 | for (const int flag : INVALIDATION_FLAGS) { |
| 40 | flags[flag] = true; | 40 | flags[flag] = true; |
| 41 | } | 41 | } |
| 42 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { | ||
| 43 | flags[index] = true; | ||
| 44 | } | ||
| 42 | return flags; | 45 | return flags; |
| 43 | } | 46 | } |
| 44 | 47 | ||
| @@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) { | |||
| 130 | StateTracker::StateTracker(Tegra::GPU& gpu) | 133 | StateTracker::StateTracker(Tegra::GPU& gpu) |
| 131 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | 134 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { |
| 132 | auto& tables = gpu.Maxwell3D().dirty.tables; | 135 | auto& tables = gpu.Maxwell3D().dirty.tables; |
| 133 | SetupDirtyRenderTargets(tables); | 136 | SetupDirtyFlags(tables); |
| 134 | SetupDirtyViewports(tables); | 137 | SetupDirtyViewports(tables); |
| 135 | SetupDirtyScissors(tables); | 138 | SetupDirtyScissors(tables); |
| 136 | SetupDirtyDepthBias(tables); | 139 | SetupDirtyDepthBias(tables); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index aa7c5d7c6..1eeb45ca9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 426 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, | 426 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, |
| 427 | VkImageAspectFlags aspect_mask, bool is_initialized, | 427 | VkImageAspectFlags aspect_mask, bool is_initialized, |
| 428 | std::span<const VkBufferImageCopy> copies) { | 428 | std::span<const VkBufferImageCopy> copies) { |
| 429 | static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | | 429 | static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = |
| 430 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | 430 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 431 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | 431 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 432 | static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT | | ||
| 433 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 434 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | ||
| 432 | const VkImageMemoryBarrier read_barrier{ | 435 | const VkImageMemoryBarrier read_barrier{ |
| 433 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 436 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 434 | .pNext = nullptr, | 437 | .pNext = nullptr, |
| 435 | .srcAccessMask = ACCESS_FLAGS, | 438 | .srcAccessMask = WRITE_ACCESS_FLAGS, |
| 436 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 439 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 437 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | 440 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, |
| 438 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 441 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 439 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 442 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 440 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 443 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 441 | .image = image, | 444 | .image = image, |
| 442 | .subresourceRange = | 445 | .subresourceRange{ |
| 443 | { | 446 | .aspectMask = aspect_mask, |
| 444 | .aspectMask = aspect_mask, | 447 | .baseMipLevel = 0, |
| 445 | .baseMipLevel = 0, | 448 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 446 | .levelCount = VK_REMAINING_MIP_LEVELS, | 449 | .baseArrayLayer = 0, |
| 447 | .baseArrayLayer = 0, | 450 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 448 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 451 | }, |
| 449 | }, | ||
| 450 | }; | 452 | }; |
| 451 | const VkImageMemoryBarrier write_barrier{ | 453 | const VkImageMemoryBarrier write_barrier{ |
| 452 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 454 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 453 | .pNext = nullptr, | 455 | .pNext = nullptr, |
| 454 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 456 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 455 | .dstAccessMask = ACCESS_FLAGS, | 457 | .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, |
| 456 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 458 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 457 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 459 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 458 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 460 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 459 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 461 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 460 | .image = image, | 462 | .image = image, |
| 461 | .subresourceRange = | 463 | .subresourceRange{ |
| 462 | { | 464 | .aspectMask = aspect_mask, |
| 463 | .aspectMask = aspect_mask, | 465 | .baseMipLevel = 0, |
| 464 | .baseMipLevel = 0, | 466 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 465 | .levelCount = VK_REMAINING_MIP_LEVELS, | 467 | .baseArrayLayer = 0, |
| 466 | .baseArrayLayer = 0, | 468 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 467 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 469 | }, |
| 468 | }, | ||
| 469 | }; | 470 | }; |
| 470 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | 471 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, |
| 471 | read_barrier); | 472 | read_barrier); |
| @@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() { | |||
| 569 | scheduler.Finish(); | 570 | scheduler.Finish(); |
| 570 | } | 571 | } |
| 571 | 572 | ||
| 572 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | 573 | StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { |
| 573 | const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); | 574 | return staging_buffer_pool.Request(size, MemoryUsage::Upload); |
| 574 | return { | ||
| 575 | .handle = staging_ref.buffer, | ||
| 576 | .span = staging_ref.mapped_span, | ||
| 577 | }; | ||
| 578 | } | 575 | } |
| 579 | 576 | ||
| 580 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | 577 | StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 581 | const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); | 578 | return staging_buffer_pool.Request(size, MemoryUsage::Download); |
| 582 | return { | ||
| 583 | .handle = staging_ref.buffer, | ||
| 584 | .span = staging_ref.mapped_span, | ||
| 585 | }; | ||
| 586 | } | 579 | } |
| 587 | 580 | ||
| 588 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 581 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| @@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 754 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | 747 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 755 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | 748 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 756 | VK_ACCESS_TRANSFER_WRITE_BIT, | 749 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 757 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 750 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 758 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 751 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 759 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 752 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 760 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 753 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| @@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 765 | VkImageMemoryBarrier{ | 758 | VkImageMemoryBarrier{ |
| 766 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 759 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 767 | .pNext = nullptr, | 760 | .pNext = nullptr, |
| 768 | .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | 761 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 769 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 770 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 771 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 772 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | 762 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 773 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | 763 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 774 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 764 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 775 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 765 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 776 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 766 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| @@ -828,12 +818,12 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 828 | } | 818 | } |
| 829 | } | 819 | } |
| 830 | 820 | ||
| 831 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 821 | void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, |
| 832 | std::span<const BufferImageCopy> copies) { | 822 | std::span<const BufferImageCopy> copies) { |
| 833 | // TODO: Move this to another API | 823 | // TODO: Move this to another API |
| 834 | scheduler->RequestOutsideRenderPassOperationContext(); | 824 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 835 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 825 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); |
| 836 | const VkBuffer src_buffer = map.handle; | 826 | const VkBuffer src_buffer = map.buffer; |
| 837 | const VkImage vk_image = *image; | 827 | const VkImage vk_image = *image; |
| 838 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 828 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| 839 | const bool is_initialized = std::exchange(initialized, true); | 829 | const bool is_initialized = std::exchange(initialized, true); |
| @@ -843,12 +833,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 843 | }); | 833 | }); |
| 844 | } | 834 | } |
| 845 | 835 | ||
| 846 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 836 | void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, |
| 847 | std::span<const VideoCommon::BufferCopy> copies) { | 837 | std::span<const VideoCommon::BufferCopy> copies) { |
| 848 | // TODO: Move this to another API | 838 | // TODO: Move this to another API |
| 849 | scheduler->RequestOutsideRenderPassOperationContext(); | 839 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 850 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); | 840 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); |
| 851 | const VkBuffer src_buffer = map.handle; | 841 | const VkBuffer src_buffer = map.buffer; |
| 852 | const VkBuffer dst_buffer = *buffer; | 842 | const VkBuffer dst_buffer = *buffer; |
| 853 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | 843 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| 854 | // TODO: Barriers | 844 | // TODO: Barriers |
| @@ -856,13 +846,58 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 856 | }); | 846 | }); |
| 857 | } | 847 | } |
| 858 | 848 | ||
| 859 | void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | 849 | void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, |
| 860 | std::span<const BufferImageCopy> copies) { | 850 | std::span<const BufferImageCopy> copies) { |
| 861 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 851 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); |
| 862 | scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, | 852 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, |
| 863 | vk_copies](vk::CommandBuffer cmdbuf) { | 853 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 864 | // TODO: Barriers | 854 | const VkImageMemoryBarrier read_barrier{ |
| 865 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); | 855 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 856 | .pNext = nullptr, | ||
| 857 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 858 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 859 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 860 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 861 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 862 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 863 | .image = image, | ||
| 864 | .subresourceRange{ | ||
| 865 | .aspectMask = aspect_mask, | ||
| 866 | .baseMipLevel = 0, | ||
| 867 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 868 | .baseArrayLayer = 0, | ||
| 869 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 870 | }, | ||
| 871 | }; | ||
| 872 | const VkImageMemoryBarrier image_write_barrier{ | ||
| 873 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 874 | .pNext = nullptr, | ||
| 875 | .srcAccessMask = 0, | ||
| 876 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 877 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 878 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 879 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 880 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 881 | .image = image, | ||
| 882 | .subresourceRange{ | ||
| 883 | .aspectMask = aspect_mask, | ||
| 884 | .baseMipLevel = 0, | ||
| 885 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 886 | .baseArrayLayer = 0, | ||
| 887 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 888 | }, | ||
| 889 | }; | ||
| 890 | const VkMemoryBarrier memory_write_barrier{ | ||
| 891 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 892 | .pNext = nullptr, | ||
| 893 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 894 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 895 | }; | ||
| 896 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 897 | 0, read_barrier); | ||
| 898 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); | ||
| 899 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 900 | 0, memory_write_barrier, nullptr, image_write_barrier); | ||
| 866 | }); | 901 | }); |
| 867 | } | 902 | } |
| 868 | 903 | ||
| @@ -1127,7 +1162,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1127 | .pAttachments = attachments.data(), | 1162 | .pAttachments = attachments.data(), |
| 1128 | .width = key.size.width, | 1163 | .width = key.size.width, |
| 1129 | .height = key.size.height, | 1164 | .height = key.size.height, |
| 1130 | .layers = static_cast<u32>(num_layers), | 1165 | .layers = static_cast<u32>(std::max(num_layers, 1)), |
| 1131 | }); | 1166 | }); |
| 1132 | if (runtime.device.HasDebuggingToolAttached()) { | 1167 | if (runtime.device.HasDebuggingToolAttached()) { |
| 1133 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | 1168 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 8d29361a1..4558c3297 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <compare> | 7 | #include <compare> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 10 | #include "video_core/texture_cache/texture_cache.h" | 11 | #include "video_core/texture_cache/texture_cache.h" |
| 11 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> { | |||
| 53 | 54 | ||
| 54 | namespace Vulkan { | 55 | namespace Vulkan { |
| 55 | 56 | ||
| 56 | struct ImageBufferMap { | ||
| 57 | [[nodiscard]] VkBuffer Handle() const noexcept { | ||
| 58 | return handle; | ||
| 59 | } | ||
| 60 | |||
| 61 | [[nodiscard]] std::span<u8> Span() const noexcept { | ||
| 62 | return span; | ||
| 63 | } | ||
| 64 | |||
| 65 | VkBuffer handle; | ||
| 66 | std::span<u8> span; | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct TextureCacheRuntime { | 57 | struct TextureCacheRuntime { |
| 70 | const Device& device; | 58 | const Device& device; |
| 71 | VKScheduler& scheduler; | 59 | VKScheduler& scheduler; |
| @@ -76,9 +64,9 @@ struct TextureCacheRuntime { | |||
| 76 | 64 | ||
| 77 | void Finish(); | 65 | void Finish(); |
| 78 | 66 | ||
| 79 | [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); | 67 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 80 | 68 | ||
| 81 | [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); | 69 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); |
| 82 | 70 | ||
| 83 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 71 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 84 | const std::array<Offset2D, 2>& dst_region, | 72 | const std::array<Offset2D, 2>& dst_region, |
| @@ -94,7 +82,7 @@ struct TextureCacheRuntime { | |||
| 94 | return false; | 82 | return false; |
| 95 | } | 83 | } |
| 96 | 84 | ||
| 97 | void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, | 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, |
| 98 | std::span<const VideoCommon::SwizzleParameters>) { | 86 | std::span<const VideoCommon::SwizzleParameters>) { |
| 99 | UNREACHABLE(); | 87 | UNREACHABLE(); |
| 100 | } | 88 | } |
| @@ -112,13 +100,13 @@ public: | |||
| 112 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 113 | VAddr cpu_addr); | 101 | VAddr cpu_addr); |
| 114 | 102 | ||
| 115 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 103 | void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, |
| 116 | std::span<const VideoCommon::BufferImageCopy> copies); | 104 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 117 | 105 | ||
| 118 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 106 | void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, |
| 119 | std::span<const VideoCommon::BufferCopy> copies); | 107 | std::span<const VideoCommon::BufferCopy> copies); |
| 120 | 108 | ||
| 121 | void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | 109 | void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, |
| 122 | std::span<const VideoCommon::BufferImageCopy> copies); | 110 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 123 | 111 | ||
| 124 | [[nodiscard]] VkImage Handle() const noexcept { | 112 | [[nodiscard]] VkImage Handle() const noexcept { |
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 0dbb1a31f..7fdff6e56 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h | |||
| @@ -9,16 +9,7 @@ | |||
| 9 | #include <shared_mutex> | 9 | #include <shared_mutex> |
| 10 | #include <thread> | 10 | #include <thread> |
| 11 | 11 | ||
| 12 | // This header includes both Vulkan and OpenGL headers, this has to be fixed | ||
| 13 | // Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues. | ||
| 14 | // Forcefully include glad early and undefine macros | ||
| 15 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 16 | #ifdef CreateEvent | ||
| 17 | #undef CreateEvent | ||
| 18 | #endif | ||
| 19 | #ifdef CreateSemaphore | ||
| 20 | #undef CreateSemaphore | ||
| 21 | #endif | ||
| 22 | 13 | ||
| 23 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 24 | #include "video_core/renderer_opengl/gl_device.h" | 15 | #include "video_core/renderer_opengl/gl_device.h" |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d3ea07aac..5f88537bc 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 76 | case SystemVariable::InvocationId: | 76 | case SystemVariable::InvocationId: |
| 77 | return Operation(OperationCode::InvocationId); | 77 | return Operation(OperationCode::InvocationId); |
| 78 | case SystemVariable::Ydirection: | 78 | case SystemVariable::Ydirection: |
| 79 | uses_y_negate = true; | ||
| 79 | return Operation(OperationCode::YNegate); | 80 | return Operation(OperationCode::YNegate); |
| 80 | case SystemVariable::InvocationInfo: | 81 | case SystemVariable::InvocationInfo: |
| 81 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | 82 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0c6ab0f07..1cd7c14d7 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -139,6 +139,10 @@ public: | |||
| 139 | return uses_legacy_varyings; | 139 | return uses_legacy_varyings; |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | bool UsesYNegate() const { | ||
| 143 | return uses_y_negate; | ||
| 144 | } | ||
| 145 | |||
| 142 | bool UsesWarps() const { | 146 | bool UsesWarps() const { |
| 143 | return uses_warps; | 147 | return uses_warps; |
| 144 | } | 148 | } |
| @@ -465,6 +469,7 @@ private: | |||
| 465 | bool uses_instance_id{}; | 469 | bool uses_instance_id{}; |
| 466 | bool uses_vertex_id{}; | 470 | bool uses_vertex_id{}; |
| 467 | bool uses_legacy_varyings{}; | 471 | bool uses_legacy_varyings{}; |
| 472 | bool uses_y_negate{}; | ||
| 468 | bool uses_warps{}; | 473 | bool uses_warps{}; |
| 469 | bool uses_indexed_samplers{}; | 474 | bool uses_indexed_samplers{}; |
| 470 | 475 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d1080300f..f336b705f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -103,9 +103,6 @@ public: | |||
| 103 | /// Notify the cache that a new frame has been queued | 103 | /// Notify the cache that a new frame has been queued |
| 104 | void TickFrame(); | 104 | void TickFrame(); |
| 105 | 105 | ||
| 106 | /// Return an unique mutually exclusive lock for the cache | ||
| 107 | [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); | ||
| 108 | |||
| 109 | /// Return a constant reference to the given image view id | 106 | /// Return a constant reference to the given image view id |
| 110 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | 107 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 111 | 108 | ||
| @@ -179,6 +176,8 @@ public: | |||
| 179 | /// Return true when a CPU region is modified from the GPU | 176 | /// Return true when a CPU region is modified from the GPU |
| 180 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 177 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 181 | 178 | ||
| 179 | std::mutex mutex; | ||
| 180 | |||
| 182 | private: | 181 | private: |
| 183 | /// Iterate over all page indices in a range | 182 | /// Iterate over all page indices in a range |
| 184 | template <typename Func> | 183 | template <typename Func> |
| @@ -212,8 +211,8 @@ private: | |||
| 212 | void RefreshContents(Image& image); | 211 | void RefreshContents(Image& image); |
| 213 | 212 | ||
| 214 | /// Upload data from guest to an image | 213 | /// Upload data from guest to an image |
| 215 | template <typename MapBuffer> | 214 | template <typename StagingBuffer> |
| 216 | void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); | 215 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset); |
| 217 | 216 | ||
| 218 | /// Find or create an image view from a guest descriptor | 217 | /// Find or create an image view from a guest descriptor |
| 219 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | 218 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| @@ -325,8 +324,6 @@ private: | |||
| 325 | 324 | ||
| 326 | RenderTargets render_targets; | 325 | RenderTargets render_targets; |
| 327 | 326 | ||
| 328 | std::mutex mutex; | ||
| 329 | |||
| 330 | std::unordered_map<TICEntry, ImageViewId> image_views; | 327 | std::unordered_map<TICEntry, ImageViewId> image_views; |
| 331 | std::unordered_map<TSCEntry, SamplerId> samplers; | 328 | std::unordered_map<TSCEntry, SamplerId> samplers; |
| 332 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | 329 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; |
| @@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() { | |||
| 386 | } | 383 | } |
| 387 | 384 | ||
| 388 | template <class P> | 385 | template <class P> |
| 389 | std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { | ||
| 390 | return std::unique_lock{mutex}; | ||
| 391 | } | ||
| 392 | |||
| 393 | template <class P> | ||
| 394 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | 386 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { |
| 395 | return slot_image_views[id]; | 387 | return slot_image_views[id]; |
| 396 | } | 388 | } |
| @@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 598 | }); | 590 | }); |
| 599 | for (const ImageId image_id : images) { | 591 | for (const ImageId image_id : images) { |
| 600 | Image& image = slot_images[image_id]; | 592 | Image& image = slot_images[image_id]; |
| 601 | auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); | 593 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
| 602 | const auto copies = FullDownloadCopies(image.info); | 594 | const auto copies = FullDownloadCopies(image.info); |
| 603 | image.DownloadMemory(map, 0, copies); | 595 | image.DownloadMemory(map, 0, copies); |
| 604 | runtime.Finish(); | 596 | runtime.Finish(); |
| 605 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); | 597 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
| 606 | } | 598 | } |
| 607 | } | 599 | } |
| 608 | 600 | ||
| @@ -757,7 +749,7 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 757 | for (const ImageId image_id : download_ids) { | 749 | for (const ImageId image_id : download_ids) { |
| 758 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 750 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; |
| 759 | } | 751 | } |
| 760 | auto download_map = runtime.MapDownloadBuffer(total_size_bytes); | 752 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 761 | size_t buffer_offset = 0; | 753 | size_t buffer_offset = 0; |
| 762 | for (const ImageId image_id : download_ids) { | 754 | for (const ImageId image_id : download_ids) { |
| 763 | Image& image = slot_images[image_id]; | 755 | Image& image = slot_images[image_id]; |
| @@ -769,7 +761,7 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 769 | runtime.Finish(); | 761 | runtime.Finish(); |
| 770 | 762 | ||
| 771 | buffer_offset = 0; | 763 | buffer_offset = 0; |
| 772 | const std::span<u8> download_span = download_map.Span(); | 764 | const std::span<u8> download_span = download_map.mapped_span; |
| 773 | for (const ImageId image_id : download_ids) { | 765 | for (const ImageId image_id : download_ids) { |
| 774 | const ImageBase& image = slot_images[image_id]; | 766 | const ImageBase& image = slot_images[image_id]; |
| 775 | const auto copies = FullDownloadCopies(image.info); | 767 | const auto copies = FullDownloadCopies(image.info); |
| @@ -806,7 +798,7 @@ void TextureCache<P>::RefreshContents(Image& image) { | |||
| 806 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | 798 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 807 | return; | 799 | return; |
| 808 | } | 800 | } |
| 809 | auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); | 801 | auto map = runtime.UploadStagingBuffer(MapSizeBytes(image)); |
| 810 | UploadImageContents(image, map, 0); | 802 | UploadImageContents(image, map, 0); |
| 811 | runtime.InsertUploadMemoryBarrier(); | 803 | runtime.InsertUploadMemoryBarrier(); |
| 812 | } | 804 | } |
| @@ -814,7 +806,7 @@ void TextureCache<P>::RefreshContents(Image& image) { | |||
| 814 | template <class P> | 806 | template <class P> |
| 815 | template <typename MapBuffer> | 807 | template <typename MapBuffer> |
| 816 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | 808 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { |
| 817 | const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); | 809 | const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset); |
| 818 | const GPUVAddr gpu_addr = image.gpu_addr; | 810 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 819 | 811 | ||
| 820 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | 812 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index 86393310a..d1ce29450 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h | |||
| @@ -78,7 +78,7 @@ public: | |||
| 78 | * | 78 | * |
| 79 | * @throw vk::Exception on failure | 79 | * @throw vk::Exception on failure |
| 80 | */ | 80 | */ |
| 81 | explicit MemoryAllocator(const Device& device_, bool export_allocations_ = false); | 81 | explicit MemoryAllocator(const Device& device_, bool export_allocations_); |
| 82 | ~MemoryAllocator(); | 82 | ~MemoryAllocator(); |
| 83 | 83 | ||
| 84 | MemoryAllocator& operator=(const MemoryAllocator&) = delete; | 84 | MemoryAllocator& operator=(const MemoryAllocator&) = delete; |