summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-16 20:48:58 -0300
committerGravatar ReinUsesLisp2021-02-13 02:17:22 -0300
commit82c2601555b59a94d7160f2fd686cb63d32dd423 (patch)
treecd0ecd865945452fa589b572de614fc487f2f96a /src/video_core/renderer_vulkan
parentvulkan_common: Expose interop and headless devices (diff)
downloadyuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.gz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.xz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.zip
video_core: Reimplement the buffer cache
Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits.
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp366
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h107
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp97
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp664
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h64
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h26
20 files changed, 585 insertions, 1003 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
531 return {}; 531 return {};
532} 532}
533 533
534VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { 534VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
535 switch (index_format) { 535 switch (index_format) {
536 case Maxwell::IndexFormat::UnsignedByte: 536 case Maxwell::IndexFormat::UnsignedByte:
537 if (!device.IsExtIndexTypeUint8Supported()) {
538 UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
539 return VK_INDEX_TYPE_UINT16;
540 }
541 return VK_INDEX_TYPE_UINT8_EXT; 537 return VK_INDEX_TYPE_UINT8_EXT;
542 case Maxwell::IndexFormat::UnsignedShort: 538 case Maxwell::IndexFormat::UnsignedShort:
543 return VK_INDEX_TYPE_UINT16; 539 return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
53 53
54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); 54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
55 55
56VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); 56VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
57 57
58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); 58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
59 59
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 6909576cb..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -107,7 +107,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
107 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), 107 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
108 surface(CreateSurface(instance, render_window)), 108 surface(CreateSurface(instance, render_window)),
109 device(CreateDevice(instance, dld, *surface)), 109 device(CreateDevice(instance, dld, *surface)),
110 memory_allocator(device), 110 memory_allocator(device, false),
111 state_tracker(gpu), 111 state_tracker(gpu),
112 scheduler(device, state_tracker), 112 scheduler(device, state_tracker),
113 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, 113 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 1efaf3b77..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -58,12 +58,11 @@ private:
58 vk::InstanceDispatch dld; 58 vk::InstanceDispatch dld;
59 59
60 vk::Instance instance; 60 vk::Instance instance;
61 61 vk::DebugUtilsMessenger debug_callback;
62 vk::SurfaceKHR surface; 62 vk::SurfaceKHR surface;
63 63
64 VKScreenInfo screen_info; 64 VKScreenInfo screen_info;
65 65
66 vk::DebugUtilsMessenger debug_callback;
67 Device device; 66 Device device;
68 MemoryAllocator memory_allocator; 67 MemoryAllocator memory_allocator;
69 StateTracker state_tracker; 68 StateTracker state_tracker;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index df8992528..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -148,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
148 SetUniformData(data, framebuffer); 148 SetUniformData(data, framebuffer);
149 SetVertexData(data, framebuffer); 149 SetVertexData(data, framebuffer);
150 150
151 const std::span<u8> map = buffer_commit.Map(); 151 const std::span<u8> mapped_span = buffer_commit.Map();
152 std::memcpy(map.data(), &data, sizeof(data)); 152 std::memcpy(mapped_span.data(), &data, sizeof(data));
153 153
154 if (!use_accelerated) { 154 if (!use_accelerated) {
155 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 155 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -162,8 +162,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
162 constexpr u32 block_height_log2 = 4; 162 constexpr u32 block_height_log2 = 4;
163 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); 163 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
164 Tegra::Texture::UnswizzleTexture( 164 Tegra::Texture::UnswizzleTexture(
165 map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, 165 mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
166 framebuffer.width, framebuffer.height, 1, block_height_log2, 0); 166 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
167 167
168 const VkBufferImageCopy copy{ 168 const VkBufferImageCopy copy{
169 .bufferOffset = image_offset, 169 .bufferOffset = image_offset,
@@ -263,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
263 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
264 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
265 }); 265 });
266
267 return *semaphores[image_index]; 266 return *semaphores[image_index];
268} 267}
269 268
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..48fc5d966 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,276 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstring> 7#include <cstring>
7#include <memory> 8#include <span>
9#include <vector>
8 10
9#include "core/core.h"
10#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h" 13#include "video_core/renderer_vulkan/vk_buffer_cache.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
13#include "video_core/renderer_vulkan/vk_stream_buffer.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
14#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
16 20
17namespace Vulkan { 21namespace Vulkan {
18
19namespace { 22namespace {
23VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
24 return VkBufferCopy{
25 .srcOffset = copy.src_offset,
26 .dstOffset = copy.dst_offset,
27 .size = copy.size,
28 };
29}
20 30
21constexpr VkBufferUsageFlags BUFFER_USAGE = 31VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
22 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | 32 if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
23 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; 33 return VK_INDEX_TYPE_UINT8_EXT;
24 34 }
25constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = 35 if (num_elements <= 0xffff) {
26 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 36 return VK_INDEX_TYPE_UINT16;
27 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 37 }
28 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; 38 return VK_INDEX_TYPE_UINT32;
29 39}
30constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 40
34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = 41size_t BytesPerIndex(VkIndexType index_type) {
35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; 42 switch (index_type) {
43 case VK_INDEX_TYPE_UINT8_EXT:
44 return 1;
45 case VK_INDEX_TYPE_UINT16:
46 return 2;
47 case VK_INDEX_TYPE_UINT32:
48 return 4;
49 default:
50 UNREACHABLE_MSG("Invalid index type={}", index_type);
51 return 1;
52 }
53}
36 54
55template <typename T>
56std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
57 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
58 std::ranges::transform(indices, indices.begin(),
59 [quad, first](u32 index) { return first + index + quad * 4; });
60 return indices;
61}
37} // Anonymous namespace 62} // Anonymous namespace
38 63
39Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, 64Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
40 StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) 65 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
41 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ 66
42 staging_pool_} { 67Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
43 buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 68 VAddr cpu_addr_, u64 size_bytes_)
69 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
70 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 71 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 72 .pNext = nullptr,
46 .flags = 0, 73 .flags = 0,
47 .size = static_cast<VkDeviceSize>(size_), 74 .size = SizeBytes(),
48 .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 75 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
76 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
77 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
79 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
49 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 80 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
50 .queueFamilyIndexCount = 0, 81 .queueFamilyIndexCount = 0,
51 .pQueueFamilyIndices = nullptr, 82 .pQueueFamilyIndices = nullptr,
52 }); 83 });
53 commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 84 if (runtime.device.HasDebuggingToolAttached()) {
85 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
86 }
87 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
54} 88}
55 89
56Buffer::~Buffer() = default; 90BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
91 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 VKDescriptorPool& descriptor_pool)
94 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
95 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
96 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
97 quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
57 98
58void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { 99StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
59 const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); 100 return staging_pool.Request(size, MemoryUsage::Upload);
60 std::memcpy(staging.mapped_span.data(), data, data_size); 101}
61 102
62 scheduler.RequestOutsideRenderPassOperationContext(); 103StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
104 return staging_pool.Request(size, MemoryUsage::Download);
105}
63 106
64 const VkBuffer handle = Handle(); 107void BufferCacheRuntime::Finish() {
65 scheduler.Record([staging = staging.buffer, handle, offset, data_size, 108 scheduler.Finish();
66 &device = device](vk::CommandBuffer cmdbuf) { 109}
67 const VkBufferMemoryBarrier read_barrier{ 110
68 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 111void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
69 .pNext = nullptr, 112 std::span<const VideoCommon::BufferCopy> copies) {
70 .srcAccessMask = 113 static constexpr VkMemoryBarrier READ_BARRIER{
71 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | 114 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
72 VK_ACCESS_HOST_WRITE_BIT | 115 .pNext = nullptr,
73 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), 116 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
74 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 117 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
75 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 118 };
76 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 119 static constexpr VkMemoryBarrier WRITE_BARRIER{
77 .buffer = handle, 120 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
78 .offset = offset, 121 .pNext = nullptr,
79 .size = data_size, 122 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
80 }; 123 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
81 const VkBufferMemoryBarrier write_barrier{ 124 };
82 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 125 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
83 .pNext = nullptr, 126 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
84 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 127 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
85 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 128 scheduler.RequestOutsideRenderPassOperationContext();
86 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 129 scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
87 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
88 .buffer = handle,
89 .offset = offset,
90 .size = data_size,
91 };
92 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 130 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
93 0, read_barrier); 131 0, READ_BARRIER);
94 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); 132 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
95 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, 133 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
96 write_barrier); 134 0, WRITE_BARRIER);
97 }); 135 });
98} 136}
99 137
100void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
101 auto staging = staging_pool.Request(data_size, MemoryUsage::Download); 139 u32 base_vertex, u32 num_indices, VkBuffer buffer,
102 scheduler.RequestOutsideRenderPassOperationContext(); 140 u32 offset, [[maybe_unused]] u32 size) {
141 VkIndexType index_type = MaxwellToVK::IndexFormat(index_format);
142 if (topology == PrimitiveTopology::Quads) {
143 index_type = VK_INDEX_TYPE_UINT32;
144 std::tie(buffer, offset) =
145 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
146 } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
147 index_type = VK_INDEX_TYPE_UINT16;
148 std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset);
149 }
150 scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) {
151 cmdbuf.BindIndexBuffer(buffer, offset, index_type);
152 });
153}
103 154
104 const VkBuffer handle = Handle(); 155void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
105 scheduler.Record( 156 ReserveQuadArrayLUT(first + count, true);
106 [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
107 const VkBufferMemoryBarrier barrier{
108 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
109 .pNext = nullptr,
110 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
111 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
112 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
113 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
114 .buffer = handle,
115 .offset = offset,
116 .size = data_size,
117 };
118
119 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
120 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
121 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
122 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
123 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
124 });
125 scheduler.Finish();
126 157
127 std::memcpy(data, staging.mapped_span.data(), data_size); 158 // The LUT has the indices 0, 1, 2, and 3 copied as an array
159 // To apply these 'first' offsets we can apply an offset based on the modulus.
160 const VkIndexType index_type = quad_array_lut_index_type;
161 const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
162 const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
163 scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
164 cmdbuf.BindIndexBuffer(buffer, offset, index_type);
165 });
128} 166}
129 167
130void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 168void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
131 std::size_t copy_size) { 169 u32 stride) {
132 scheduler.RequestOutsideRenderPassOperationContext(); 170 if (device.IsExtExtendedDynamicStateSupported()) {
171 scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
172 const VkDeviceSize vk_offset = offset;
173 const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
174 const VkDeviceSize vk_stride = stride;
175 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
176 });
177 } else {
178 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
179 cmdbuf.BindVertexBuffer(index, buffer, offset);
180 });
181 }
182}
133 183
134 const VkBuffer dst_buffer = Handle(); 184void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
135 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, 185 u32 size) {
136 copy_size](vk::CommandBuffer cmdbuf) { 186 if (!device.IsExtTransformFeedbackSupported()) {
137 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); 187 // Already logged in the rasterizer
138 188 return;
139 std::array<VkBufferMemoryBarrier, 2> barriers; 189 }
140 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 190 scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
141 barriers[0].pNext = nullptr; 191 const VkDeviceSize vk_offset = offset;
142 barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 192 const VkDeviceSize vk_size = size;
143 barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; 193 cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
144 barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
145 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
146 barriers[0].buffer = src_buffer;
147 barriers[0].offset = src_offset;
148 barriers[0].size = copy_size;
149 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
150 barriers[1].pNext = nullptr;
151 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
152 barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
153 barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
154 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
155 barriers[1].buffer = dst_buffer;
156 barriers[1].offset = dst_offset;
157 barriers[1].size = copy_size;
158 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
159 barriers, {});
160 }); 194 });
161} 195}
162 196
163VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 197void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
164 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 198 update_descriptor_queue.AddBuffer(buffer, offset, size);
165 const Device& device_, MemoryAllocator& memory_allocator_,
166 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
167 StagingBufferPool& staging_pool_)
168 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
169 cpu_memory_, stream_buffer_},
170 device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
171 staging_pool{staging_pool_} {}
172
173VKBufferCache::~VKBufferCache() = default;
174
175std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
176 return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
177 size);
178} 199}
179 200
180VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { 201void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
181 size = std::max(size, std::size_t(4)); 202 if (num_indices <= current_num_indices) {
182 const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); 203 return;
204 }
205 if (wait_for_idle) {
206 scheduler.Finish();
207 }
208 current_num_indices = num_indices;
209 quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
210
211 const u32 num_quads = num_indices / 4;
212 const u32 num_triangle_indices = num_quads * 6;
213 const u32 num_first_offset_copies = 4;
214 const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
215 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
216 quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
217 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
218 .pNext = nullptr,
219 .flags = 0,
220 .size = size_bytes,
221 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
222 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
223 .queueFamilyIndexCount = 0,
224 .pQueueFamilyIndices = nullptr,
225 });
226 if (device.HasDebuggingToolAttached()) {
227 quad_array_lut.SetObjectNameEXT("Quad LUT");
228 }
229 quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
230
231 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
232 u8* staging_data = staging.mapped_span.data();
233 const size_t quad_size = bytes_per_index * 6;
234 for (u32 first = 0; first < num_first_offset_copies; ++first) {
235 for (u32 quad = 0; quad < num_quads; ++quad) {
236 switch (quad_array_lut_index_type) {
237 case VK_INDEX_TYPE_UINT8_EXT:
238 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
239 break;
240 case VK_INDEX_TYPE_UINT16:
241 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
242 break;
243 case VK_INDEX_TYPE_UINT32:
244 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
245 break;
246 default:
247 UNREACHABLE();
248 break;
249 }
250 staging_data += quad_size;
251 }
252 }
183 scheduler.RequestOutsideRenderPassOperationContext(); 253 scheduler.RequestOutsideRenderPassOperationContext();
184 scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { 254 scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut,
185 cmdbuf.FillBuffer(buffer, 0, size, 0); 255 size_bytes](vk::CommandBuffer cmdbuf) {
256 const VkBufferCopy copy{
257 .srcOffset = 0,
258 .dstOffset = 0,
259 .size = size_bytes,
260 };
261 const VkBufferMemoryBarrier write_barrier{
262 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
263 .pNext = nullptr,
264 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
265 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
266 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
267 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
268 .buffer = dst_buffer,
269 .offset = 0,
270 .size = size_bytes,
271 };
272 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
273 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
274 0, write_barrier);
186 }); 275 });
187 return {empty.buffer, 0, 0};
188} 276}
189 277
190} // namespace Vulkan 278} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..d232e1f2d 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,112 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8
9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 7#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_compute_pass.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
12#include "video_core/renderer_vulkan/vk_stream_buffer.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 11#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 12#include "video_core/vulkan_common/vulkan_wrapper.h"
15 13
16namespace Vulkan { 14namespace Vulkan {
17 15
18class Device; 16class Device;
17class VKDescriptorPool;
19class VKScheduler; 18class VKScheduler;
19class VKUpdateDescriptorQueue;
20 20
21class Buffer final : public VideoCommon::BufferBlock { 21class BufferCacheRuntime;
22public:
23 explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
24 StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
25 ~Buffer();
26
27 void Upload(std::size_t offset, std::size_t data_size, const u8* data);
28
29 void Download(std::size_t offset, std::size_t data_size, u8* data);
30 22
31 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 23class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
32 std::size_t copy_size); 24public:
25 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_);
33 28
34 VkBuffer Handle() const { 29 [[nodiscard]] VkBuffer Handle() const noexcept {
35 return *buffer; 30 return *buffer;
36 } 31 }
37 32
38 u64 Address() const { 33 operator VkBuffer() const noexcept {
39 return 0; 34 return *buffer;
40 } 35 }
41 36
42private: 37private:
43 const Device& device;
44 VKScheduler& scheduler;
45 StagingBufferPool& staging_pool;
46
47 vk::Buffer buffer; 38 vk::Buffer buffer;
48 MemoryCommit commit; 39 MemoryCommit commit;
49}; 40};
50 41
51class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 42class BufferCacheRuntime {
43 friend Buffer;
44
45 using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
46 using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
47
52public: 48public:
53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, 49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
55 const Device& device, MemoryAllocator& memory_allocator, 51 VKUpdateDescriptorQueue& update_descriptor_queue_,
56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer, 52 VKDescriptorPool& descriptor_pool);
57 StagingBufferPool& staging_pool); 53
58 ~VKBufferCache(); 54 void Finish();
55
56 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
57
58 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
59 59
60 BufferInfo GetEmptyBuffer(std::size_t size) override; 60 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
61 std::span<const VideoCommon::BufferCopy> copies);
61 62
62protected: 63 void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
63 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 64 u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
65
66 void BindQuadArrayIndexBuffer(u32 first, u32 count);
67
68 void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
69
70 void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
71
72 void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
73 BindBuffer(buffer, offset, size);
74 }
75
76 void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
77 [[maybe_unused]] bool is_written) {
78 BindBuffer(buffer, offset, size);
79 }
64 80
65private: 81private:
82 void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
83
84 void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
85
66 const Device& device; 86 const Device& device;
67 MemoryAllocator& memory_allocator; 87 MemoryAllocator& memory_allocator;
68 VKScheduler& scheduler; 88 VKScheduler& scheduler;
69 StagingBufferPool& staging_pool; 89 StagingBufferPool& staging_pool;
90 VKUpdateDescriptorQueue& update_descriptor_queue;
91
92 vk::Buffer quad_array_lut;
93 MemoryCommit quad_array_lut_commit;
94 VkIndexType quad_array_lut_index_type{};
95 u32 current_num_indices = 0;
96
97 Uint8Pass uint8_pass;
98 QuadIndexedPass quad_index_pass;
70}; 99};
71 100
101struct BufferCacheParams {
102 using Runtime = Vulkan::BufferCacheRuntime;
103 using Buffer = Vulkan::Buffer;
104
105 static constexpr bool IS_OPENGL = false;
106 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
107 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
108 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
109 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
110 static constexpr bool USE_MEMORY_MAPS = true;
111};
112
113using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
114
72} // namespace Vulkan 115} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..a4fdcdf81 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,6 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 13#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 14#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 15#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +21,7 @@
22#include "video_core/vulkan_common/vulkan_wrapper.h" 21#include "video_core/vulkan_common/vulkan_wrapper.h"
23 22
24namespace Vulkan { 23namespace Vulkan {
25
26namespace { 24namespace {
27
28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
29 return {
30 .binding = 0,
31 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
32 .descriptorCount = 1,
33 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
34 .pImmutableSamplers = nullptr,
35 };
36}
37
38VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
39 return {
40 .dstBinding = 0,
41 .dstArrayElement = 0,
42 .descriptorCount = 1,
43 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
44 .offset = 0,
45 .stride = sizeof(DescriptorUpdateEntry),
46 };
47}
48
49VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 25VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
50 return { 26 return {
51 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 27 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +138,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
162 return set; 138 return set;
163} 139}
164 140
165QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
166 VKDescriptorPool& descriptor_pool_,
167 StagingBufferPool& staging_buffer_pool_,
168 VKUpdateDescriptorQueue& update_descriptor_queue_)
169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
173 update_descriptor_queue{update_descriptor_queue_} {}
174
175QuadArrayPass::~QuadArrayPass() = default;
176
177std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
178 const u32 num_triangle_vertices = (num_vertices / 4) * 6;
179 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
180 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
181
182 update_descriptor_queue.Acquire();
183 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
184 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
185
186 scheduler.RequestOutsideRenderPassOperationContext();
187
188 ASSERT(num_vertices % 4 == 0);
189 const u32 num_quads = num_vertices / 4;
190 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
191 num_quads, first, set](vk::CommandBuffer cmdbuf) {
192 constexpr u32 dispatch_size = 1024;
193 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
195 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
196 cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
197
198 VkBufferMemoryBarrier barrier;
199 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
200 barrier.pNext = nullptr;
201 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
202 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
203 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
204 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
205 barrier.buffer = buffer;
206 barrier.offset = 0;
207 barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
208 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
209 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
210 });
211 return {staging_ref.buffer, 0};
212}
213
214Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, 141Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
215 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, 142 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
216 VKUpdateDescriptorQueue& update_descriptor_queue_) 143 VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,18 +148,18 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
221 148
222Uint8Pass::~Uint8Pass() = default; 149Uint8Pass::~Uint8Pass() = default;
223 150
224std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, 151std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
225 u64 src_offset) { 152 u32 src_offset) {
226 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 153 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
227 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 154 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
228 155
229 update_descriptor_queue.Acquire(); 156 update_descriptor_queue.Acquire();
230 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 157 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
231 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 158 update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
232 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 159 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
233 160
234 scheduler.RequestOutsideRenderPassOperationContext(); 161 scheduler.RequestOutsideRenderPassOperationContext();
235 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 162 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
236 num_vertices](vk::CommandBuffer cmdbuf) { 163 num_vertices](vk::CommandBuffer cmdbuf) {
237 constexpr u32 dispatch_size = 1024; 164 constexpr u32 dispatch_size = 1024;
238 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 165 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
@@ -252,7 +179,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
252 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 179 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
253 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 180 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
254 }); 181 });
255 return {staging_ref.buffer, 0}; 182 return {staging.buffer, 0};
256} 183}
257 184
258QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 185QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +194,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
267 194
268QuadIndexedPass::~QuadIndexedPass() = default; 195QuadIndexedPass::~QuadIndexedPass() = default;
269 196
270std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( 197std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
271 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 198 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
272 VkBuffer src_buffer, u64 src_offset) { 199 VkBuffer src_buffer, u32 src_offset) {
273 const u32 index_shift = [index_format] { 200 const u32 index_shift = [index_format] {
274 switch (index_format) { 201 switch (index_format) {
275 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: 202 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,15 +213,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
286 const u32 num_tri_vertices = (num_vertices / 4) * 6; 213 const u32 num_tri_vertices = (num_vertices / 4) * 6;
287 214
288 const std::size_t staging_size = num_tri_vertices * sizeof(u32); 215 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
289 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 216 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
290 217
291 update_descriptor_queue.Acquire(); 218 update_descriptor_queue.Acquire();
292 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 219 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
293 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 220 update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
294 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 221 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
295 222
296 scheduler.RequestOutsideRenderPassOperationContext(); 223 scheduler.RequestOutsideRenderPassOperationContext();
297 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 224 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
298 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 225 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
299 static constexpr u32 dispatch_size = 1024; 226 static constexpr u32 dispatch_size = 1024;
300 const std::array push_constants = {base_vertex, index_shift}; 227 const std::array push_constants = {base_vertex, index_shift};
@@ -317,7 +244,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
317 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 244 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
318 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 245 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
319 }); 246 });
320 return {staging_ref.buffer, 0}; 247 return {staging.buffer, 0};
321} 248}
322 249
323} // namespace Vulkan 250} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..4904019f5 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
41 vk::ShaderModule module; 41 vk::ShaderModule module;
42}; 42};
43 43
44class QuadArrayPass final : public VKComputePass {
45public:
46 explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
47 VKDescriptorPool& descriptor_pool_,
48 StagingBufferPool& staging_buffer_pool_,
49 VKUpdateDescriptorQueue& update_descriptor_queue_);
50 ~QuadArrayPass();
51
52 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
53
54private:
55 VKScheduler& scheduler;
56 StagingBufferPool& staging_buffer_pool;
57 VKUpdateDescriptorQueue& update_descriptor_queue;
58};
59
60class Uint8Pass final : public VKComputePass { 44class Uint8Pass final : public VKComputePass {
61public: 45public:
62 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 46 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,9 @@ public:
64 VKUpdateDescriptorQueue& update_descriptor_queue_); 48 VKUpdateDescriptorQueue& update_descriptor_queue_);
65 ~Uint8Pass(); 49 ~Uint8Pass();
66 50
67 std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); 51 /// Assemble uint8 indices into an uint16 index buffer
52 /// Returns a pair with the staging buffer, and the offset where the assembled data is
53 std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset);
68 54
69private: 55private:
70 VKScheduler& scheduler; 56 VKScheduler& scheduler;
@@ -80,9 +66,9 @@ public:
80 VKUpdateDescriptorQueue& update_descriptor_queue_); 66 VKUpdateDescriptorQueue& update_descriptor_queue_);
81 ~QuadIndexedPass(); 67 ~QuadIndexedPass();
82 68
83 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 69 std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
84 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, 70 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
85 u64 src_offset); 71 u32 src_offset);
86 72
87private: 73private:
88 VKScheduler& scheduler; 74 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
45} 45}
46 46
47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
48 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 48 TextureCache& texture_cache_, BufferCache& buffer_cache_,
49 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 49 VKQueryCache& query_cache_, const Device& device_,
50 VKScheduler& scheduler_) 50 VKScheduler& scheduler_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
52 scheduler{scheduler_} {} 52 scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
22namespace Vulkan { 22namespace Vulkan {
23 23
24class Device; 24class Device;
25class VKBufferCache;
26class VKQueryCache; 25class VKQueryCache;
27class VKScheduler; 26class VKScheduler;
28 27
@@ -45,14 +44,14 @@ private:
45using Fence = std::shared_ptr<InnerFence>; 44using Fence = std::shared_ptr<InnerFence>;
46 45
47using GenericFenceManager = 46using GenericFenceManager =
48 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; 47 VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
49 48
50class VKFenceManager final : public GenericFenceManager { 49class VKFenceManager final : public GenericFenceManager {
51public: 50public:
52 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 51 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
53 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 52 TextureCache& texture_cache, BufferCache& buffer_cache,
54 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 53 VKQueryCache& query_cache, const Device& device,
55 VKScheduler& scheduler_); 54 VKScheduler& scheduler);
56 55
57protected: 56protected:
58 Fence CreateFence(u32 value, bool is_stubbed) override; 57 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..684d4e3a6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
8#include <mutex> 8#include <mutex>
9#include <vector> 9#include <vector>
10 10
11#include <boost/container/static_vector.hpp>
12
13#include "common/alignment.h" 11#include "common/alignment.h"
14#include "common/assert.h" 12#include "common/assert.h"
15#include "common/logging/log.h" 13#include "common/logging/log.h"
@@ -24,7 +22,6 @@
24#include "video_core/renderer_vulkan/maxwell_to_vk.h" 22#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h" 23#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); 48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); 49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
57MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
58MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
59MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
60 51
61namespace { 52namespace {
53struct DrawParams {
54 u32 base_instance;
55 u32 num_instances;
56 u32 base_vertex;
57 u32 num_vertices;
58 bool is_indexed;
59};
62 60
63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); 61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
64 62
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
67 const float width = src.scale_x * 2.0f; 65 const float width = src.scale_x * 2.0f;
68 const float height = src.scale_y * 2.0f; 66 const float height = src.scale_y * 2.0f;
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; 67 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
70
71 VkViewport viewport{ 68 VkViewport viewport{
72 .x = src.translate_x - src.scale_x, 69 .x = src.translate_x - src.scale_x,
73 .y = src.translate_y - src.scale_y, 70 .y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
76 .minDepth = src.translate_z - src.scale_z * reduce_z, 73 .minDepth = src.translate_z - src.scale_z * reduce_z,
77 .maxDepth = src.translate_z + src.scale_z, 74 .maxDepth = src.translate_z + src.scale_z,
78 }; 75 };
79
80 if (!device.IsExtDepthRangeUnrestrictedSupported()) { 76 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
81 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); 77 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
82 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); 78 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
83 } 79 }
84
85 return viewport; 80 return viewport;
86} 81}
87 82
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); 141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
147} 142}
148 143
149template <size_t N>
150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
151 std::array<VkDeviceSize, N> expanded;
152 std::copy(strides.begin(), strides.end(), expanded.begin());
153 return expanded;
154}
155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { 144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) { 145 if (entry.is_buffer) {
158 return ImageViewType::e2D; 146 return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
221 } 209 }
222} 210}
223 211
224} // Anonymous namespace 212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
225 213 bool is_indexed) {
226class BufferBindings final { 214 DrawParams params{
227public: 215 .base_instance = regs.vb_base_instance,
228 void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { 216 .num_instances = is_instanced ? num_instances : 1,
229 vertex.buffers[vertex.num_buffers] = buffer; 217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
230 vertex.offsets[vertex.num_buffers] = offset; 218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
231 vertex.sizes[vertex.num_buffers] = size; 219 .is_indexed = is_indexed,
232 vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); 220 };
233 ++vertex.num_buffers; 221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
234 } 222 // 6 triangle vertices per quad, base vertex is part of the index
235 223 // See BindQuadArrayIndexBuffer for more details
236 void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { 224 params.num_vertices = (params.num_vertices / 4) * 6;
237 index.buffer = buffer; 225 params.base_vertex = 0;
238 index.offset = offset; 226 params.is_indexed = true;
239 index.type = type;
240 }
241
242 void Bind(const Device& device, VKScheduler& scheduler) const {
243 // Use this large switch case to avoid dispatching more memory in the record lambda than
244 // what we need. It looks horrible, but it's the best we can do on standard C++.
245 switch (vertex.num_buffers) {
246 case 0:
247 return BindStatic<0>(device, scheduler);
248 case 1:
249 return BindStatic<1>(device, scheduler);
250 case 2:
251 return BindStatic<2>(device, scheduler);
252 case 3:
253 return BindStatic<3>(device, scheduler);
254 case 4:
255 return BindStatic<4>(device, scheduler);
256 case 5:
257 return BindStatic<5>(device, scheduler);
258 case 6:
259 return BindStatic<6>(device, scheduler);
260 case 7:
261 return BindStatic<7>(device, scheduler);
262 case 8:
263 return BindStatic<8>(device, scheduler);
264 case 9:
265 return BindStatic<9>(device, scheduler);
266 case 10:
267 return BindStatic<10>(device, scheduler);
268 case 11:
269 return BindStatic<11>(device, scheduler);
270 case 12:
271 return BindStatic<12>(device, scheduler);
272 case 13:
273 return BindStatic<13>(device, scheduler);
274 case 14:
275 return BindStatic<14>(device, scheduler);
276 case 15:
277 return BindStatic<15>(device, scheduler);
278 case 16:
279 return BindStatic<16>(device, scheduler);
280 case 17:
281 return BindStatic<17>(device, scheduler);
282 case 18:
283 return BindStatic<18>(device, scheduler);
284 case 19:
285 return BindStatic<19>(device, scheduler);
286 case 20:
287 return BindStatic<20>(device, scheduler);
288 case 21:
289 return BindStatic<21>(device, scheduler);
290 case 22:
291 return BindStatic<22>(device, scheduler);
292 case 23:
293 return BindStatic<23>(device, scheduler);
294 case 24:
295 return BindStatic<24>(device, scheduler);
296 case 25:
297 return BindStatic<25>(device, scheduler);
298 case 26:
299 return BindStatic<26>(device, scheduler);
300 case 27:
301 return BindStatic<27>(device, scheduler);
302 case 28:
303 return BindStatic<28>(device, scheduler);
304 case 29:
305 return BindStatic<29>(device, scheduler);
306 case 30:
307 return BindStatic<30>(device, scheduler);
308 case 31:
309 return BindStatic<31>(device, scheduler);
310 case 32:
311 return BindStatic<32>(device, scheduler);
312 }
313 UNREACHABLE();
314 }
315
316private:
317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
318 struct {
319 size_t num_buffers = 0;
320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
323 std::array<u16, Maxwell::NumVertexArrays> strides;
324 } vertex;
325
326 struct {
327 VkBuffer buffer = nullptr;
328 VkDeviceSize offset;
329 VkIndexType type;
330 } index;
331
332 template <size_t N>
333 void BindStatic(const Device& device, VKScheduler& scheduler) const {
334 if (device.IsExtExtendedDynamicStateSupported()) {
335 if (index.buffer) {
336 BindStatic<N, true, true>(scheduler);
337 } else {
338 BindStatic<N, false, true>(scheduler);
339 }
340 } else {
341 if (index.buffer) {
342 BindStatic<N, true, false>(scheduler);
343 } else {
344 BindStatic<N, false, false>(scheduler);
345 }
346 }
347 }
348
349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
350 void BindStatic(VKScheduler& scheduler) const {
351 static_assert(N <= Maxwell::NumVertexArrays);
352 if constexpr (N == 0) {
353 return;
354 }
355
356 std::array<VkBuffer, N> buffers;
357 std::array<VkDeviceSize, N> offsets;
358 std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
359 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
360
361 if constexpr (has_extended_dynamic_state) {
362 // With extended dynamic states we can specify the length and stride of a vertex buffer
363 std::array<VkDeviceSize, N> sizes;
364 std::array<u16, N> strides;
365 std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
366 std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
367
368 if constexpr (is_indexed) {
369 scheduler.Record(
370 [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
371 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
372 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
373 offsets.data(), sizes.data(),
374 ExpandStrides(strides).data());
375 });
376 } else {
377 scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
378 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
379 offsets.data(), sizes.data(),
380 ExpandStrides(strides).data());
381 });
382 }
383 return;
384 }
385
386 if constexpr (is_indexed) {
387 // Indexed draw
388 scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
389 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
390 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
391 });
392 } else {
393 // Array draw
394 scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
395 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
396 });
397 }
398 }
399};
400
401void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
402 if (is_indexed) {
403 cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
404 } else {
405 cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
406 } 227 }
228 return params;
407} 229}
230} // Anonymous namespace
408 231
409RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 232RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
410 Tegra::MemoryManager& gpu_memory_, 233 Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, 237 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, 238 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
416 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, 239 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), 240 state_tracker{state_tracker_}, scheduler{scheduler_},
418 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 241 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
419 update_descriptor_queue(device, scheduler), 242 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool), 243 blit_image(device, scheduler, state_tracker, descriptor_pool),
421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
424 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, 244 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 245 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
246 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
247 update_descriptor_queue, descriptor_pool),
248 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 249 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
427 descriptor_pool, update_descriptor_queue), 250 descriptor_pool, update_descriptor_queue),
428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
429 stream_buffer, staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, 251 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), 252 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
432 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 253 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
433 scheduler.SetQueryCache(query_cache); 254 scheduler.SetQueryCache(query_cache);
434 if (device.UseAsynchronousShaders()) { 255 if (device.UseAsynchronousShaders()) {
@@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
449 GraphicsPipelineCacheKey key; 270 GraphicsPipelineCacheKey key;
450 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); 271 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
451 272
452 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); 273 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
453
454 BufferBindings buffer_bindings;
455 const DrawParameters draw_params =
456 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
457 274
458 auto lock = texture_cache.AcquireLock();
459 texture_cache.SynchronizeGraphicsDescriptors(); 275 texture_cache.SynchronizeGraphicsDescriptors();
460
461 texture_cache.UpdateRenderTargets(false); 276 texture_cache.UpdateRenderTargets(false);
462 277
463 const auto shaders = pipeline_cache.GetShaders(); 278 const auto shaders = pipeline_cache.GetShaders();
464 key.shaders = GetShaderAddresses(shaders); 279 key.shaders = GetShaderAddresses(shaders);
465 SetupShaderDescriptors(shaders); 280 SetupShaderDescriptors(shaders, is_indexed);
466
467 buffer_cache.Unmap();
468 281
469 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 282 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
470 key.renderpass = framebuffer->RenderPass(); 283 key.renderpass = framebuffer->RenderPass();
@@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
476 return; 289 return;
477 } 290 }
478 291
479 buffer_bindings.Bind(device, scheduler);
480
481 BeginTransformFeedback(); 292 BeginTransformFeedback();
482 293
483 scheduler.RequestRenderpass(framebuffer); 294 scheduler.RequestRenderpass(framebuffer);
484 scheduler.BindGraphicsPipeline(pipeline->GetHandle()); 295 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
485 UpdateDynamicStates(); 296 UpdateDynamicStates();
486 297
487 const auto pipeline_layout = pipeline->GetLayout(); 298 const auto& regs = maxwell3d.regs;
488 const auto descriptor_set = pipeline->CommitDescriptorSet(); 299 const u32 num_instances = maxwell3d.mme_draw.instance_count;
300 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
301 const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
302 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
489 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 303 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
490 if (descriptor_set) { 304 if (descriptor_set) {
491 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 305 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
492 DESCRIPTOR_SET, descriptor_set, {}); 306 DESCRIPTOR_SET, descriptor_set, nullptr);
307 }
308 if (draw_params.is_indexed) {
309 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
310 draw_params.base_vertex, draw_params.base_instance);
311 } else {
312 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance);
493 } 314 }
494 draw_params.Draw(cmdbuf);
495 }); 315 });
496 316
497 EndTransformFeedback(); 317 EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
515 return; 335 return;
516 } 336 }
517 337
518 auto lock = texture_cache.AcquireLock(); 338 std::scoped_lock lock{texture_cache.mutex};
519 texture_cache.UpdateRenderTargets(true); 339 texture_cache.UpdateRenderTargets(true);
520 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 340 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
521 const VkExtent2D render_area = framebuffer->RenderArea(); 341 const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
559 if (use_stencil) { 379 if (use_stencil) {
560 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; 380 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
561 } 381 }
562
563 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, 382 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
564 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { 383 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
565 VkClearAttachment attachment; 384 VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
580 auto& pipeline = pipeline_cache.GetComputePipeline({ 399 auto& pipeline = pipeline_cache.GetComputePipeline({
581 .shader = code_addr, 400 .shader = code_addr,
582 .shared_memory_size = launch_desc.shared_alloc, 401 .shared_memory_size = launch_desc.shared_alloc,
583 .workgroup_size = 402 .workgroup_size{
584 { 403 launch_desc.block_dim_x,
585 launch_desc.block_dim_x, 404 launch_desc.block_dim_y,
586 launch_desc.block_dim_y, 405 launch_desc.block_dim_z,
587 launch_desc.block_dim_z, 406 },
588 },
589 }); 407 });
590 408
591 // Compute dispatches can't be executed inside a renderpass 409 // Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
594 image_view_indices.clear(); 412 image_view_indices.clear();
595 sampler_handles.clear(); 413 sampler_handles.clear();
596 414
597 auto lock = texture_cache.AcquireLock(); 415 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
598 texture_cache.SynchronizeComputeDescriptors();
599 416
600 const auto& entries = pipeline.GetEntries(); 417 const auto& entries = pipeline.GetEntries();
418 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
419 buffer_cache.UnbindComputeStorageBuffers();
420 u32 ssbo_index = 0;
421 for (const auto& buffer : entries.global_buffers) {
422 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
423 buffer.is_written);
424 ++ssbo_index;
425 }
426 buffer_cache.UpdateComputeBuffers();
427
428 texture_cache.SynchronizeComputeDescriptors();
429
601 SetupComputeUniformTexels(entries); 430 SetupComputeUniformTexels(entries);
602 SetupComputeTextures(entries); 431 SetupComputeTextures(entries);
603 SetupComputeStorageTexels(entries); 432 SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
606 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 435 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
607 texture_cache.FillComputeImageViews(indices_span, image_view_ids); 436 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
608 437
609 buffer_cache.Map(CalculateComputeStreamBufferSize());
610
611 update_descriptor_queue.Acquire(); 438 update_descriptor_queue.Acquire();
612 439
613 SetupComputeConstBuffers(entries); 440 buffer_cache.BindHostComputeBuffers();
614 SetupComputeGlobalBuffers(entries);
615 441
616 ImageViewId* image_view_id_ptr = image_view_ids.data(); 442 ImageViewId* image_view_id_ptr = image_view_ids.data();
617 VkSampler* sampler_ptr = sampler_handles.data(); 443 VkSampler* sampler_ptr = sampler_handles.data();
618 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, 444 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
619 sampler_ptr); 445 sampler_ptr);
620 446
621 buffer_cache.Unmap();
622
623 const VkPipeline pipeline_handle = pipeline.GetHandle(); 447 const VkPipeline pipeline_handle = pipeline.GetHandle();
624 const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); 448 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
625 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); 449 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
644 query_cache.Query(gpu_addr, type, timestamp); 468 query_cache.Query(gpu_addr, type, timestamp);
645} 469}
646 470
471void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
472 u32 size) {
473 buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
474}
475
647void RasterizerVulkan::FlushAll() {} 476void RasterizerVulkan::FlushAll() {}
648 477
649void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { 478void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
651 return; 480 return;
652 } 481 }
653 { 482 {
654 auto lock = texture_cache.AcquireLock(); 483 std::scoped_lock lock{texture_cache.mutex};
655 texture_cache.DownloadMemory(addr, size); 484 texture_cache.DownloadMemory(addr, size);
656 } 485 }
657 buffer_cache.FlushRegion(addr, size); 486 {
487 std::scoped_lock lock{buffer_cache.mutex};
488 buffer_cache.DownloadMemory(addr, size);
489 }
658 query_cache.FlushRegion(addr, size); 490 query_cache.FlushRegion(addr, size);
659} 491}
660 492
661bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { 493bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
494 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
662 if (!Settings::IsGPULevelHigh()) { 495 if (!Settings::IsGPULevelHigh()) {
663 return buffer_cache.MustFlushRegion(addr, size); 496 return buffer_cache.IsRegionGpuModified(addr, size);
664 } 497 }
665 return texture_cache.IsRegionGpuModified(addr, size) || 498 return texture_cache.IsRegionGpuModified(addr, size) ||
666 buffer_cache.MustFlushRegion(addr, size); 499 buffer_cache.IsRegionGpuModified(addr, size);
667} 500}
668 501
669void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 502void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
671 return; 504 return;
672 } 505 }
673 { 506 {
674 auto lock = texture_cache.AcquireLock(); 507 std::scoped_lock lock{texture_cache.mutex};
675 texture_cache.WriteMemory(addr, size); 508 texture_cache.WriteMemory(addr, size);
676 } 509 }
510 {
511 std::scoped_lock lock{buffer_cache.mutex};
512 buffer_cache.WriteMemory(addr, size);
513 }
677 pipeline_cache.InvalidateRegion(addr, size); 514 pipeline_cache.InvalidateRegion(addr, size);
678 buffer_cache.InvalidateRegion(addr, size);
679 query_cache.InvalidateRegion(addr, size); 515 query_cache.InvalidateRegion(addr, size);
680} 516}
681 517
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
683 if (addr == 0 || size == 0) { 519 if (addr == 0 || size == 0) {
684 return; 520 return;
685 } 521 }
522 pipeline_cache.OnCPUWrite(addr, size);
686 { 523 {
687 auto lock = texture_cache.AcquireLock(); 524 std::scoped_lock lock{texture_cache.mutex};
688 texture_cache.WriteMemory(addr, size); 525 texture_cache.WriteMemory(addr, size);
689 } 526 }
690 pipeline_cache.OnCPUWrite(addr, size); 527 {
691 buffer_cache.OnCPUWrite(addr, size); 528 std::scoped_lock lock{buffer_cache.mutex};
529 buffer_cache.CachedWriteMemory(addr, size);
530 }
692} 531}
693 532
694void RasterizerVulkan::SyncGuestHost() { 533void RasterizerVulkan::SyncGuestHost() {
695 buffer_cache.SyncGuestHost();
696 pipeline_cache.SyncGuestHost(); 534 pipeline_cache.SyncGuestHost();
535 {
536 std::scoped_lock lock{buffer_cache.mutex};
537 buffer_cache.FlushCachedWrites();
538 }
697} 539}
698 540
699void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 541void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
700 { 542 {
701 auto lock = texture_cache.AcquireLock(); 543 std::scoped_lock lock{texture_cache.mutex};
702 texture_cache.UnmapMemory(addr, size); 544 texture_cache.UnmapMemory(addr, size);
703 } 545 }
704 buffer_cache.OnCPUWrite(addr, size); 546 {
547 std::scoped_lock lock{buffer_cache.mutex};
548 buffer_cache.WriteMemory(addr, size);
549 }
705 pipeline_cache.OnCPUWrite(addr, size); 550 pipeline_cache.OnCPUWrite(addr, size);
706} 551}
707 552
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
774 draw_counter = 0; 619 draw_counter = 0;
775 update_descriptor_queue.TickFrame(); 620 update_descriptor_queue.TickFrame();
776 fence_manager.TickFrame(); 621 fence_manager.TickFrame();
777 buffer_cache.TickFrame();
778 staging_pool.TickFrame(); 622 staging_pool.TickFrame();
779 { 623 {
780 auto lock = texture_cache.AcquireLock(); 624 std::scoped_lock lock{texture_cache.mutex};
781 texture_cache.TickFrame(); 625 texture_cache.TickFrame();
782 } 626 }
627 {
628 std::scoped_lock lock{buffer_cache.mutex};
629 buffer_cache.TickFrame();
630 }
783} 631}
784 632
785bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 633bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
786 const Tegra::Engines::Fermi2D::Surface& dst, 634 const Tegra::Engines::Fermi2D::Surface& dst,
787 const Tegra::Engines::Fermi2D::Config& copy_config) { 635 const Tegra::Engines::Fermi2D::Config& copy_config) {
788 auto lock = texture_cache.AcquireLock(); 636 std::scoped_lock lock{texture_cache.mutex};
789 texture_cache.BlitImage(dst, src, copy_config); 637 texture_cache.BlitImage(dst, src, copy_config);
790 return true; 638 return true;
791} 639}
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
795 if (!framebuffer_addr) { 643 if (!framebuffer_addr) {
796 return false; 644 return false;
797 } 645 }
798 646 std::scoped_lock lock{texture_cache.mutex};
799 auto lock = texture_cache.AcquireLock();
800 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); 647 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
801 if (!image_view) { 648 if (!image_view) {
802 return false; 649 return false;
803 } 650 }
804
805 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 651 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
806 screen_info.width = image_view->size.width; 652 screen_info.width = image_view->size.width;
807 screen_info.height = image_view->size.height; 653 screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
830 draw_counter = 0; 676 draw_counter = 0;
831} 677}
832 678
833RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
834 BufferBindings& buffer_bindings,
835 bool is_indexed,
836 bool is_instanced) {
837 MICROPROFILE_SCOPE(Vulkan_Geometry);
838
839 const auto& regs = maxwell3d.regs;
840
841 SetupVertexArrays(buffer_bindings);
842
843 const u32 base_instance = regs.vb_base_instance;
844 const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
845 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
846 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
847
848 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
849 SetupIndexBuffer(buffer_bindings, params, is_indexed);
850
851 return params;
852}
853
854void RasterizerVulkan::SetupShaderDescriptors( 679void RasterizerVulkan::SetupShaderDescriptors(
855 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 680 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
856 image_view_indices.clear(); 681 image_view_indices.clear();
857 sampler_handles.clear(); 682 sampler_handles.clear();
858 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 683 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
860 if (!shader) { 685 if (!shader) {
861 continue; 686 continue;
862 } 687 }
863 const auto& entries = shader->GetEntries(); 688 const ShaderEntries& entries = shader->GetEntries();
864 SetupGraphicsUniformTexels(entries, stage); 689 SetupGraphicsUniformTexels(entries, stage);
865 SetupGraphicsTextures(entries, stage); 690 SetupGraphicsTextures(entries, stage);
866 SetupGraphicsStorageTexels(entries, stage); 691 SetupGraphicsStorageTexels(entries, stage);
867 SetupGraphicsImages(entries, stage); 692 SetupGraphicsImages(entries, stage);
693
694 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
695 buffer_cache.UnbindGraphicsStorageBuffers(stage);
696 u32 ssbo_index = 0;
697 for (const auto& buffer : entries.global_buffers) {
698 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
699 buffer.cbuf_offset, buffer.is_written);
700 ++ssbo_index;
701 }
868 } 702 }
869 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 703 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
704 buffer_cache.UpdateGraphicsBuffers(is_indexed);
870 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); 705 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
871 706
707 buffer_cache.BindHostGeometryBuffers(is_indexed);
708
872 update_descriptor_queue.Acquire(); 709 update_descriptor_queue.Acquire();
873 710
874 ImageViewId* image_view_id_ptr = image_view_ids.data(); 711 ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
879 if (!shader) { 716 if (!shader) {
880 continue; 717 continue;
881 } 718 }
882 const auto& entries = shader->GetEntries(); 719 buffer_cache.BindHostStageBuffers(stage);
883 SetupGraphicsConstBuffers(entries, stage); 720 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
884 SetupGraphicsGlobalBuffers(entries, stage); 721 image_view_id_ptr, sampler_ptr);
885 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
886 sampler_ptr);
887 } 722 }
888} 723}
889 724
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
916 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); 751 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
917 return; 752 return;
918 } 753 }
919
920 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 754 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
921 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 755 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
922 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 756 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
923 757 scheduler.Record(
924 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); 758 [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
925 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
926 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
927
928 const auto& binding = regs.tfb_bindings[0];
929 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
930 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
931
932 const GPUVAddr gpu_addr = binding.Address();
933 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
934 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
935
936 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
937 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
938 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
939 });
940} 759}
941 760
942void RasterizerVulkan::EndTransformFeedback() { 761void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
947 if (!device.IsExtTransformFeedbackSupported()) { 766 if (!device.IsExtTransformFeedbackSupported()) {
948 return; 767 return;
949 } 768 }
950
951 scheduler.Record( 769 scheduler.Record(
952 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 770 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
953} 771}
954 772
955void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
956 const auto& regs = maxwell3d.regs;
957
958 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
959 const auto& vertex_array = regs.vertex_array[index];
960 if (!vertex_array.IsEnabled()) {
961 continue;
962 }
963 const GPUVAddr start{vertex_array.StartAddress()};
964 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
965
966 ASSERT(end >= start);
967 const size_t size = end - start;
968 if (size == 0) {
969 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
970 continue;
971 }
972 const auto info = buffer_cache.UploadMemory(start, size);
973 buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
974 }
975}
976
977void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
978 bool is_indexed) {
979 if (params.num_vertices == 0) {
980 return;
981 }
982 const auto& regs = maxwell3d.regs;
983 switch (regs.draw.topology) {
984 case Maxwell::PrimitiveTopology::Quads: {
985 if (!params.is_indexed) {
986 const auto [buffer, offset] =
987 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
988 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
989 params.base_vertex = 0;
990 params.num_vertices = params.num_vertices * 6 / 4;
991 params.is_indexed = true;
992 break;
993 }
994 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
995 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
996 VkBuffer buffer = info.handle;
997 u64 offset = info.offset;
998 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
999 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
1000
1001 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
1002 params.num_vertices = (params.num_vertices / 4) * 6;
1003 params.base_vertex = 0;
1004 break;
1005 }
1006 default: {
1007 if (!is_indexed) {
1008 break;
1009 }
1010 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
1011 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1012 VkBuffer buffer = info.handle;
1013 u64 offset = info.offset;
1014
1015 auto format = regs.index_array.format;
1016 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
1017 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
1018 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
1019 format = Maxwell::IndexFormat::UnsignedShort;
1020 }
1021
1022 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
1023 break;
1024 }
1025 }
1026}
1027
1028void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1029 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1030 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1031 for (const auto& entry : entries.const_buffers) {
1032 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
1033 }
1034}
1035
1036void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1037 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1038 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1039
1040 for (const auto& entry : entries.global_buffers) {
1041 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
1042 SetupGlobalBuffer(entry, addr);
1043 }
1044}
1045
1046void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { 773void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1047 MICROPROFILE_SCOPE(Vulkan_Textures);
1048 const auto& regs = maxwell3d.regs; 774 const auto& regs = maxwell3d.regs;
1049 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 775 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1050 for (const auto& entry : entries.uniform_texels) { 776 for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
1054} 780}
1055 781
1056void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { 782void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1057 MICROPROFILE_SCOPE(Vulkan_Textures);
1058 const auto& regs = maxwell3d.regs; 783 const auto& regs = maxwell3d.regs;
1059 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 784 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1060 for (const auto& entry : entries.samplers) { 785 for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
1070} 795}
1071 796
1072void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { 797void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1073 MICROPROFILE_SCOPE(Vulkan_Textures);
1074 const auto& regs = maxwell3d.regs; 798 const auto& regs = maxwell3d.regs;
1075 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 799 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1076 for (const auto& entry : entries.storage_texels) { 800 for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
1080} 804}
1081 805
1082void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { 806void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1083 MICROPROFILE_SCOPE(Vulkan_Images);
1084 const auto& regs = maxwell3d.regs; 807 const auto& regs = maxwell3d.regs;
1085 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 808 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1086 for (const auto& entry : entries.images) { 809 for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
1089 } 812 }
1090} 813}
1091 814
1092void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1093 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1094 const auto& launch_desc = kepler_compute.launch_description;
1095 for (const auto& entry : entries.const_buffers) {
1096 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1097 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1098 const Tegra::Engines::ConstBufferInfo info{
1099 .address = config.Address(),
1100 .size = config.size,
1101 .enabled = mask[entry.GetIndex()],
1102 };
1103 SetupConstBuffer(entry, info);
1104 }
1105}
1106
1107void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1108 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1109 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1110 for (const auto& entry : entries.global_buffers) {
1111 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
1112 SetupGlobalBuffer(entry, addr);
1113 }
1114}
1115
1116void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 815void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1117 MICROPROFILE_SCOPE(Vulkan_Textures);
1118 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 816 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1119 for (const auto& entry : entries.uniform_texels) { 817 for (const auto& entry : entries.uniform_texels) {
1120 const TextureHandle handle = 818 const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1124} 822}
1125 823
1126void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 824void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1127 MICROPROFILE_SCOPE(Vulkan_Textures);
1128 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 825 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1129 for (const auto& entry : entries.samplers) { 826 for (const auto& entry : entries.samplers) {
1130 for (size_t index = 0; index < entry.size; ++index) { 827 for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1139} 836}
1140 837
1141void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 838void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1142 MICROPROFILE_SCOPE(Vulkan_Textures);
1143 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 839 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1144 for (const auto& entry : entries.storage_texels) { 840 for (const auto& entry : entries.storage_texels) {
1145 const TextureHandle handle = 841 const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1149} 845}
1150 846
1151void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 847void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1152 MICROPROFILE_SCOPE(Vulkan_Images);
1153 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 848 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1154 for (const auto& entry : entries.images) { 849 for (const auto& entry : entries.images) {
1155 const TextureHandle handle = 850 const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1158 } 853 }
1159} 854}
1160 855
1161void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1162 const Tegra::Engines::ConstBufferInfo& buffer) {
1163 if (!buffer.enabled) {
1164 // Set values to zero to unbind buffers
1165 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1166 return;
1167 }
1168 // Align the size to avoid bad std140 interactions
1169 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1170 ASSERT(size <= MaxConstbufferSize);
1171
1172 const u64 alignment = device.GetUniformBufferAlignment();
1173 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1174 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1175}
1176
1177void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
1178 const u64 actual_addr = gpu_memory.Read<u64>(address);
1179 const u32 size = gpu_memory.Read<u32>(address + 8);
1180
1181 if (size == 0) {
1182 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
1183 // because Vulkan doesn't like empty buffers.
1184 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1185 // default buffer.
1186 static constexpr size_t dummy_size = 4;
1187 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1188 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1189 return;
1190 }
1191
1192 const auto info = buffer_cache.UploadMemory(
1193 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1194 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1195}
1196
1197void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 856void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1198 if (!state_tracker.TouchViewports()) { 857 if (!state_tracker.TouchViewports()) {
1199 return; 858 return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
1206 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), 865 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
1207 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), 866 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
1208 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), 867 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
1209 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; 868 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
869 };
1210 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); 870 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
1211} 871}
1212 872
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
1214 if (!state_tracker.TouchScissors()) { 874 if (!state_tracker.TouchScissors()) {
1215 return; 875 return;
1216 } 876 }
1217 const std::array scissors = { 877 const std::array scissors{
1218 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), 878 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
1219 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), 879 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
1220 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), 880 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
1221 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), 881 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
1222 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), 882 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
1223 GetScissorState(regs, 15)}; 883 GetScissorState(regs, 15),
884 };
1224 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); 885 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
1225} 886}
1226 887
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1385 }); 1046 });
1386} 1047}
1387 1048
1388size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1389 size_t size = CalculateVertexArraysSize();
1390 if (is_indexed) {
1391 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1392 }
1393 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1394 return size;
1395}
1396
1397size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1398 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1399 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1400}
1401
1402size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1403 const auto& regs = maxwell3d.regs;
1404
1405 size_t size = 0;
1406 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1407 // This implementation assumes that all attributes are used in the shader.
1408 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1409 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1410 DEBUG_ASSERT(end >= start);
1411
1412 size += (end - start) * regs.vertex_array[index].enable;
1413 }
1414 return size;
1415}
1416
1417size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1418 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1419 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1420}
1421
1422size_t RasterizerVulkan::CalculateConstBufferSize(
1423 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1424 if (entry.IsIndirect()) {
1425 // Buffer is accessed indirectly, so upload the entire thing
1426 return buffer.size;
1427 } else {
1428 // Buffer is accessed directly, upload just what we use
1429 return entry.GetSize();
1430 }
1431}
1432
1433VkBuffer RasterizerVulkan::DefaultBuffer() {
1434 if (default_buffer) {
1435 return *default_buffer;
1436 }
1437 default_buffer = device.GetLogical().CreateBuffer({
1438 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1439 .pNext = nullptr,
1440 .flags = 0,
1441 .size = DEFAULT_BUFFER_SIZE,
1442 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1443 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
1444 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1445 .queueFamilyIndexCount = 0,
1446 .pQueueFamilyIndices = nullptr,
1447 });
1448 default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
1449
1450 scheduler.RequestOutsideRenderPassOperationContext();
1451 scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
1452 cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
1453 });
1454 return *default_buffer;
1455}
1456
1457} // namespace Vulkan 1049} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..7fc6741da 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,12 @@
18#include "video_core/renderer_vulkan/blit_image.h" 18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 21#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 22#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 23#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
25#include "video_core/renderer_vulkan/vk_query_cache.h" 24#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 25#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 26#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
28#include "video_core/renderer_vulkan/vk_stream_buffer.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 27#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 28#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h" 29#include "video_core/shader/async_shaders.h"
@@ -49,7 +47,6 @@ namespace Vulkan {
49struct VKScreenInfo; 47struct VKScreenInfo;
50 48
51class StateTracker; 49class StateTracker;
52class BufferBindings;
53 50
54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 51class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
55public: 52public:
@@ -65,6 +62,7 @@ public:
65 void DispatchCompute(GPUVAddr code_addr) override; 62 void DispatchCompute(GPUVAddr code_addr) override;
66 void ResetCounter(VideoCore::QueryType type) override; 63 void ResetCounter(VideoCore::QueryType type) override;
67 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 64 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
65 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
68 void FlushAll() override; 66 void FlushAll() override;
69 void FlushRegion(VAddr addr, u64 size) override; 67 void FlushRegion(VAddr addr, u64 size) override;
70 bool MustFlushRegion(VAddr addr, u64 size) override; 68 bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +105,11 @@ private:
107 105
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); 106 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109 107
110 struct DrawParameters {
111 void Draw(vk::CommandBuffer cmdbuf) const;
112
113 u32 base_instance = 0;
114 u32 num_instances = 0;
115 u32 base_vertex = 0;
116 u32 num_vertices = 0;
117 bool is_indexed = 0;
118 };
119
120 void FlushWork(); 108 void FlushWork();
121 109
122 /// Setups geometry buffers and state.
123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
124 bool is_indexed, bool is_instanced);
125
126 /// Setup descriptors in the graphics pipeline. 110 /// Setup descriptors in the graphics pipeline.
127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 111 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
112 bool is_indexed);
128 113
129 void UpdateDynamicStates(); 114 void UpdateDynamicStates();
130 115
@@ -132,16 +117,6 @@ private:
132 117
133 void EndTransformFeedback(); 118 void EndTransformFeedback();
134 119
135 void SetupVertexArrays(BufferBindings& buffer_bindings);
136
137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
138
139 /// Setup constant buffers in the graphics pipeline.
140 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
141
142 /// Setup global buffers in the graphics pipeline.
143 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
144
145 /// Setup uniform texels in the graphics pipeline. 120 /// Setup uniform texels in the graphics pipeline.
146 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); 121 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
147 122
@@ -154,12 +129,6 @@ private:
154 /// Setup images in the graphics pipeline. 129 /// Setup images in the graphics pipeline.
155 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); 130 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
156 131
157 /// Setup constant buffers in the compute pipeline.
158 void SetupComputeConstBuffers(const ShaderEntries& entries);
159
160 /// Setup global buffers in the compute pipeline.
161 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
162
163 /// Setup texel buffers in the compute pipeline. 132 /// Setup texel buffers in the compute pipeline.
164 void SetupComputeUniformTexels(const ShaderEntries& entries); 133 void SetupComputeUniformTexels(const ShaderEntries& entries);
165 134
@@ -172,11 +141,6 @@ private:
172 /// Setup images in the compute pipeline. 141 /// Setup images in the compute pipeline.
173 void SetupComputeImages(const ShaderEntries& entries); 142 void SetupComputeImages(const ShaderEntries& entries);
174 143
175 void SetupConstBuffer(const ConstBufferEntry& entry,
176 const Tegra::Engines::ConstBufferInfo& buffer);
177
178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
179
180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 144 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 145 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 146 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +157,6 @@ private:
193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 157 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 158 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
195 159
196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
197
198 size_t CalculateComputeStreamBufferSize() const;
199
200 size_t CalculateVertexArraysSize() const;
201
202 size_t CalculateIndexBufferSize() const;
203
204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
206
207 VkBuffer DefaultBuffer();
208
209 Tegra::GPU& gpu; 160 Tegra::GPU& gpu;
210 Tegra::MemoryManager& gpu_memory; 161 Tegra::MemoryManager& gpu_memory;
211 Tegra::Engines::Maxwell3D& maxwell3d; 162 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +168,19 @@ private:
217 StateTracker& state_tracker; 168 StateTracker& state_tracker;
218 VKScheduler& scheduler; 169 VKScheduler& scheduler;
219 170
220 VKStreamBuffer stream_buffer;
221 StagingBufferPool staging_pool; 171 StagingBufferPool staging_pool;
222 VKDescriptorPool descriptor_pool; 172 VKDescriptorPool descriptor_pool;
223 VKUpdateDescriptorQueue update_descriptor_queue; 173 VKUpdateDescriptorQueue update_descriptor_queue;
224 BlitImageHelper blit_image; 174 BlitImageHelper blit_image;
225 QuadArrayPass quad_array_pass;
226 QuadIndexedPass quad_indexed_pass;
227 Uint8Pass uint8_pass;
228 175
229 TextureCacheRuntime texture_cache_runtime; 176 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache; 177 TextureCache texture_cache;
178 BufferCacheRuntime buffer_cache_runtime;
179 BufferCache buffer_cache;
231 VKPipelineCache pipeline_cache; 180 VKPipelineCache pipeline_cache;
232 VKBufferCache buffer_cache;
233 VKQueryCache query_cache; 181 VKQueryCache query_cache;
234 VKFenceManager fence_manager; 182 VKFenceManager fence_manager;
235 183
236 vk::Buffer default_buffer;
237 MemoryCommit default_buffer_commit;
238 vk::Event wfi_event; 184 vk::Event wfi_event;
239 VideoCommon::Shader::AsyncShaders async_shaders; 185 VideoCommon::Shader::AsyncShaders async_shaders;
240 186
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
52 worker_thread.join(); 52 worker_thread.join();
53} 53}
54 54
55u64 VKScheduler::CurrentTick() const noexcept {
56 return master_semaphore->CurrentTick();
57}
58
59bool VKScheduler::IsFree(u64 tick) const noexcept {
60 return master_semaphore->IsFree(tick);
61}
62
63void VKScheduler::Wait(u64 tick) {
64 master_semaphore->Wait(tick);
65}
66
67void VKScheduler::Flush(VkSemaphore semaphore) { 55void VKScheduler::Flush(VkSemaphore semaphore) {
68 SubmitExecution(semaphore); 56 SubmitExecution(semaphore);
69 AllocateNewContext(); 57 AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 257 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 258 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 259 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, 260 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images)); 261 vk::Span(barriers.data(), num_images));
274 }); 262 });
275 state.renderpass = nullptr; 263 state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/threadsafe_queue.h" 16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
21class CommandPool; 22class CommandPool;
22class Device; 23class Device;
23class Framebuffer; 24class Framebuffer;
24class MasterSemaphore;
25class StateTracker; 25class StateTracker;
26class VKQueryCache; 26class VKQueryCache;
27 27
@@ -32,15 +32,6 @@ public:
32 explicit VKScheduler(const Device& device, StateTracker& state_tracker); 32 explicit VKScheduler(const Device& device, StateTracker& state_tracker);
33 ~VKScheduler(); 33 ~VKScheduler();
34 34
35 /// Returns the current command buffer tick.
36 [[nodiscard]] u64 CurrentTick() const noexcept;
37
38 /// Returns true when a tick has been triggered by the GPU.
39 [[nodiscard]] bool IsFree(u64 tick) const noexcept;
40
41 /// Waits for the given tick to trigger on the GPU.
42 void Wait(u64 tick);
43
44 /// Sends the current execution context to the GPU. 35 /// Sends the current execution context to the GPU.
45 void Flush(VkSemaphore semaphore = nullptr); 36 void Flush(VkSemaphore semaphore = nullptr);
46 37
@@ -82,6 +73,21 @@ public:
82 (void)chunk->Record(command); 73 (void)chunk->Record(command);
83 } 74 }
84 75
76 /// Returns the current command buffer tick.
77 [[nodiscard]] u64 CurrentTick() const noexcept {
78 return master_semaphore->CurrentTick();
79 }
80
81 /// Returns true when a tick has been triggered by the GPU.
82 [[nodiscard]] bool IsFree(u64 tick) const noexcept {
83 return master_semaphore->IsFree(tick);
84 }
85
86 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) {
88 master_semaphore->Wait(tick);
89 }
90
85 /// Returns the master timeline semaphore. 91 /// Returns the master timeline semaphore.
86 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { 92 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
87 return *master_semaphore; 93 return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..e165a6987 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -3127,6 +3127,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3127 entries.attributes.insert(GetGenericAttributeLocation(attribute)); 3127 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3128 } 3128 }
3129 } 3129 }
3130 for (const auto& buffer : entries.const_buffers) {
3131 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3132 }
3130 entries.clip_distances = ir.GetClipDistances(); 3133 entries.clip_distances = ir.GetClipDistances();
3131 entries.shader_length = ir.GetLength(); 3134 entries.shader_length = ir.GetLength();
3132 entries.uses_warps = ir.UsesWarps(); 3135 entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
39 u32 index{}; 39 u32 index{};
40}; 40};
41 41
42class GlobalBufferEntry { 42struct GlobalBufferEntry {
43public:
44 constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
45 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
46
47 constexpr u32 GetCbufIndex() const {
48 return cbuf_index;
49 }
50
51 constexpr u32 GetCbufOffset() const {
52 return cbuf_offset;
53 }
54
55 constexpr bool IsWritten() const {
56 return is_written;
57 }
58
59private:
60 u32 cbuf_index{}; 43 u32 cbuf_index{};
61 u32 cbuf_offset{}; 44 u32 cbuf_offset{};
62 bool is_written{}; 45 bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
78 std::set<u32> attributes; 61 std::set<u32> attributes;
79 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
80 std::size_t shader_length{}; 63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
81 bool uses_warps{}; 65 bool uses_warps{};
82}; 66};
83 67
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..e81fad007 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
30using Flags = Maxwell3D::DirtyState::Flags; 30using Flags = Maxwell3D::DirtyState::Flags;
31 31
32Flags MakeInvalidationFlags() { 32Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{ 33 static constexpr int INVALIDATION_FLAGS[]{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, 36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
37 }; 37 };
38 Flags flags{}; 38 Flags flags{};
39 for (const int flag : INVALIDATION_FLAGS) { 39 for (const int flag : INVALIDATION_FLAGS) {
40 flags[flag] = true; 40 flags[flag] = true;
41 } 41 }
42 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
43 flags[index] = true;
44 }
42 return flags; 45 return flags;
43} 46}
44 47
@@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
130StateTracker::StateTracker(Tegra::GPU& gpu) 133StateTracker::StateTracker(Tegra::GPU& gpu)
131 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 134 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
132 auto& tables = gpu.Maxwell3D().dirty.tables; 135 auto& tables = gpu.Maxwell3D().dirty.tables;
133 SetupDirtyRenderTargets(tables); 136 SetupDirtyFlags(tables);
134 SetupDirtyViewports(tables); 137 SetupDirtyViewports(tables);
135 SetupDirtyScissors(tables); 138 SetupDirtyScissors(tables);
136 SetupDirtyDepthBias(tables); 139 SetupDirtyDepthBias(tables);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..1eeb45ca9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, 426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
427 VkImageAspectFlags aspect_mask, bool is_initialized, 427 VkImageAspectFlags aspect_mask, bool is_initialized,
428 std::span<const VkBufferImageCopy> copies) { 428 std::span<const VkBufferImageCopy> copies) {
429 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | 429 static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
430 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 430 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
432 static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
433 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
434 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
432 const VkImageMemoryBarrier read_barrier{ 435 const VkImageMemoryBarrier read_barrier{
433 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 436 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
434 .pNext = nullptr, 437 .pNext = nullptr,
435 .srcAccessMask = ACCESS_FLAGS, 438 .srcAccessMask = WRITE_ACCESS_FLAGS,
436 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 439 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
437 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, 440 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
438 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 441 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
439 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 442 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
440 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 443 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
441 .image = image, 444 .image = image,
442 .subresourceRange = 445 .subresourceRange{
443 { 446 .aspectMask = aspect_mask,
444 .aspectMask = aspect_mask, 447 .baseMipLevel = 0,
445 .baseMipLevel = 0, 448 .levelCount = VK_REMAINING_MIP_LEVELS,
446 .levelCount = VK_REMAINING_MIP_LEVELS, 449 .baseArrayLayer = 0,
447 .baseArrayLayer = 0, 450 .layerCount = VK_REMAINING_ARRAY_LAYERS,
448 .layerCount = VK_REMAINING_ARRAY_LAYERS, 451 },
449 },
450 }; 452 };
451 const VkImageMemoryBarrier write_barrier{ 453 const VkImageMemoryBarrier write_barrier{
452 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 454 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
453 .pNext = nullptr, 455 .pNext = nullptr,
454 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 456 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
455 .dstAccessMask = ACCESS_FLAGS, 457 .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
456 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 458 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
457 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 459 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
458 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 460 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
459 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 461 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
460 .image = image, 462 .image = image,
461 .subresourceRange = 463 .subresourceRange{
462 { 464 .aspectMask = aspect_mask,
463 .aspectMask = aspect_mask, 465 .baseMipLevel = 0,
464 .baseMipLevel = 0, 466 .levelCount = VK_REMAINING_MIP_LEVELS,
465 .levelCount = VK_REMAINING_MIP_LEVELS, 467 .baseArrayLayer = 0,
466 .baseArrayLayer = 0, 468 .layerCount = VK_REMAINING_ARRAY_LAYERS,
467 .layerCount = VK_REMAINING_ARRAY_LAYERS, 469 },
468 },
469 }; 470 };
470 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 471 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
471 read_barrier); 472 read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
569 scheduler.Finish(); 570 scheduler.Finish();
570} 571}
571 572
572ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { 573StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
573 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); 574 return staging_buffer_pool.Request(size, MemoryUsage::Upload);
574 return {
575 .handle = staging_ref.buffer,
576 .span = staging_ref.mapped_span,
577 };
578} 575}
579 576
580ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { 577StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
581 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); 578 return staging_buffer_pool.Request(size, MemoryUsage::Download);
582 return {
583 .handle = staging_ref.buffer,
584 .span = staging_ref.mapped_span,
585 };
586} 579}
587 580
588void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 581void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
754 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 747 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
755 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 748 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
756 VK_ACCESS_TRANSFER_WRITE_BIT, 749 VK_ACCESS_TRANSFER_WRITE_BIT,
757 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 750 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
758 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 751 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
759 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 752 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
760 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 753 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
765 VkImageMemoryBarrier{ 758 VkImageMemoryBarrier{
766 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 759 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
767 .pNext = nullptr, 760 .pNext = nullptr,
768 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | 761 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
769 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
770 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
771 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
772 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 762 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
773 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, 763 VK_ACCESS_TRANSFER_WRITE_BIT,
774 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 764 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
775 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 765 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
776 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 766 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,12 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
828 } 818 }
829} 819}
830 820
831void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 821void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
832 std::span<const BufferImageCopy> copies) { 822 std::span<const BufferImageCopy> copies) {
833 // TODO: Move this to another API 823 // TODO: Move this to another API
834 scheduler->RequestOutsideRenderPassOperationContext(); 824 scheduler->RequestOutsideRenderPassOperationContext();
835 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 825 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
836 const VkBuffer src_buffer = map.handle; 826 const VkBuffer src_buffer = map.buffer;
837 const VkImage vk_image = *image; 827 const VkImage vk_image = *image;
838 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 828 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
839 const bool is_initialized = std::exchange(initialized, true); 829 const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +833,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
843 }); 833 });
844} 834}
845 835
846void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 836void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
847 std::span<const VideoCommon::BufferCopy> copies) { 837 std::span<const VideoCommon::BufferCopy> copies) {
848 // TODO: Move this to another API 838 // TODO: Move this to another API
849 scheduler->RequestOutsideRenderPassOperationContext(); 839 scheduler->RequestOutsideRenderPassOperationContext();
850 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); 840 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
851 const VkBuffer src_buffer = map.handle; 841 const VkBuffer src_buffer = map.buffer;
852 const VkBuffer dst_buffer = *buffer; 842 const VkBuffer dst_buffer = *buffer;
853 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { 843 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
854 // TODO: Barriers 844 // TODO: Barriers
@@ -856,13 +846,58 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
856 }); 846 });
857} 847}
858 848
859void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 849void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
860 std::span<const BufferImageCopy> copies) { 850 std::span<const BufferImageCopy> copies) {
861 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 851 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
862 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, 852 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
863 vk_copies](vk::CommandBuffer cmdbuf) { 853 vk_copies](vk::CommandBuffer cmdbuf) {
864 // TODO: Barriers 854 const VkImageMemoryBarrier read_barrier{
865 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); 855 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
856 .pNext = nullptr,
857 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
858 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
859 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
860 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
861 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
862 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
863 .image = image,
864 .subresourceRange{
865 .aspectMask = aspect_mask,
866 .baseMipLevel = 0,
867 .levelCount = VK_REMAINING_MIP_LEVELS,
868 .baseArrayLayer = 0,
869 .layerCount = VK_REMAINING_ARRAY_LAYERS,
870 },
871 };
872 const VkImageMemoryBarrier image_write_barrier{
873 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
874 .pNext = nullptr,
875 .srcAccessMask = 0,
876 .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
877 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
878 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
879 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
880 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
881 .image = image,
882 .subresourceRange{
883 .aspectMask = aspect_mask,
884 .baseMipLevel = 0,
885 .levelCount = VK_REMAINING_MIP_LEVELS,
886 .baseArrayLayer = 0,
887 .layerCount = VK_REMAINING_ARRAY_LAYERS,
888 },
889 };
890 const VkMemoryBarrier memory_write_barrier{
891 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
892 .pNext = nullptr,
893 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
894 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
895 };
896 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
897 0, read_barrier);
898 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
899 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
900 0, memory_write_barrier, nullptr, image_write_barrier);
866 }); 901 });
867} 902}
868 903
@@ -1127,7 +1162,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1127 .pAttachments = attachments.data(), 1162 .pAttachments = attachments.data(),
1128 .width = key.size.width, 1163 .width = key.size.width,
1129 .height = key.size.height, 1164 .height = key.size.height,
1130 .layers = static_cast<u32>(num_layers), 1165 .layers = static_cast<u32>(std::max(num_layers, 1)),
1131 }); 1166 });
1132 if (runtime.device.HasDebuggingToolAttached()) { 1167 if (runtime.device.HasDebuggingToolAttached()) {
1133 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); 1168 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..4558c3297 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
10#include "video_core/texture_cache/texture_cache.h" 11#include "video_core/texture_cache/texture_cache.h"
11#include "video_core/vulkan_common/vulkan_memory_allocator.h" 12#include "video_core/vulkan_common/vulkan_memory_allocator.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
53 54
54namespace Vulkan { 55namespace Vulkan {
55 56
56struct ImageBufferMap {
57 [[nodiscard]] VkBuffer Handle() const noexcept {
58 return handle;
59 }
60
61 [[nodiscard]] std::span<u8> Span() const noexcept {
62 return span;
63 }
64
65 VkBuffer handle;
66 std::span<u8> span;
67};
68
69struct TextureCacheRuntime { 57struct TextureCacheRuntime {
70 const Device& device; 58 const Device& device;
71 VKScheduler& scheduler; 59 VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
76 64
77 void Finish(); 65 void Finish();
78 66
79 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); 67 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
80 68
81 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); 69 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
82 70
83 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 71 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
84 const std::array<Offset2D, 2>& dst_region, 72 const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
94 return false; 82 return false;
95 } 83 }
96 84
97 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, 85 void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t,
98 std::span<const VideoCommon::SwizzleParameters>) { 86 std::span<const VideoCommon::SwizzleParameters>) {
99 UNREACHABLE(); 87 UNREACHABLE();
100 } 88 }
@@ -112,13 +100,13 @@ public:
112 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
113 VAddr cpu_addr); 101 VAddr cpu_addr);
114 102
115 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 103 void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
116 std::span<const VideoCommon::BufferImageCopy> copies); 104 std::span<const VideoCommon::BufferImageCopy> copies);
117 105
118 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 106 void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
119 std::span<const VideoCommon::BufferCopy> copies); 107 std::span<const VideoCommon::BufferCopy> copies);
120 108
121 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 109 void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
122 std::span<const VideoCommon::BufferImageCopy> copies); 110 std::span<const VideoCommon::BufferImageCopy> copies);
123 111
124 [[nodiscard]] VkImage Handle() const noexcept { 112 [[nodiscard]] VkImage Handle() const noexcept {