summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar bunnei2021-02-12 22:22:18 -0800
committerGravatar GitHub2021-02-12 22:22:18 -0800
commitd3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980 (patch)
tree5c900d10847e1768a4951c1e6bec35f2618b5991 /src/video_core/renderer_vulkan
parentMerge pull request #5877 from ameerj/res-limit-usage (diff)
parentconfig: Make high GPU accuracy the default (diff)
downloadyuzu-d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980.tar.gz
yuzu-d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980.tar.xz
yuzu-d3c7a7e7cf4bcabb171c98fe55e6e0291f8ee980.zip
Merge pull request #5741 from ReinUsesLisp/new-bufcache
video_core: Reimplement the buffer cache
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp153
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h46
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp394
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h119
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp144
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h27
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp664
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h64
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp155
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp139
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h27
25 files changed, 921 insertions, 1181 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
531 return {}; 531 return {};
532} 532}
533 533
534VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { 534VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
535 switch (index_format) { 535 switch (index_format) {
536 case Maxwell::IndexFormat::UnsignedByte: 536 case Maxwell::IndexFormat::UnsignedByte:
537 if (!device.IsExtIndexTypeUint8Supported()) {
538 UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
539 return VK_INDEX_TYPE_UINT16;
540 }
541 return VK_INDEX_TYPE_UINT8_EXT; 537 return VK_INDEX_TYPE_UINT8_EXT;
542 case Maxwell::IndexFormat::UnsignedShort: 538 case Maxwell::IndexFormat::UnsignedShort:
543 return VK_INDEX_TYPE_UINT16; 539 return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
53 53
54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); 54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
55 55
56VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); 56VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
57 57
58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); 58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
59 59
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 61796e33a..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
80 return separated_extensions; 80 return separated_extensions;
81} 81}
82 82
83Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
84 VkSurfaceKHR surface) {
85 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
86 const s32 device_index = Settings::values.vulkan_device.GetValue();
87 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
88 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
89 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
90 }
91 const vk::PhysicalDevice physical_device(devices[device_index], dld);
92 return Device(*instance, physical_device, surface, dld);
93}
83} // Anonymous namespace 94} // Anonymous namespace
84 95
85RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 96RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
86 Core::Frontend::EmuWindow& emu_window, 97 Core::Frontend::EmuWindow& emu_window,
87 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 98 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
88 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 99 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
89 : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, 100 : RendererBase(emu_window, std::move(context_)),
90 cpu_memory{cpu_memory_}, gpu{gpu_} {} 101 telemetry_session(telemetry_session_),
102 cpu_memory(cpu_memory_),
103 gpu(gpu_),
104 library(OpenLibrary()),
105 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
106 true, Settings::values.renderer_debug)),
107 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
108 surface(CreateSurface(instance, render_window)),
109 device(CreateDevice(instance, dld, *surface)),
110 memory_allocator(device, false),
111 state_tracker(gpu),
112 scheduler(device, state_tracker),
113 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
114 render_window.GetFramebufferLayout().height, false),
115 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
116 screen_info),
117 rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
118 memory_allocator, state_tracker, scheduler) {
119 Report();
120} catch (const vk::Exception& exception) {
121 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
122 throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
123}
91 124
92RendererVulkan::~RendererVulkan() { 125RendererVulkan::~RendererVulkan() {
93 ShutDown(); 126 void(device.GetLogical().WaitIdle());
94} 127}
95 128
96void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 129void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
101 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 134 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
102 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 135 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
103 const bool use_accelerated = 136 const bool use_accelerated =
104 rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 137 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
105 const bool is_srgb = use_accelerated && screen_info.is_srgb; 138 const bool is_srgb = use_accelerated && screen_info.is_srgb;
106 if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { 139 if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
107 swapchain->Create(layout.width, layout.height, is_srgb); 140 swapchain.Create(layout.width, layout.height, is_srgb);
108 blit_screen->Recreate(); 141 blit_screen.Recreate();
109 } 142 }
110 143
111 scheduler->WaitWorker(); 144 scheduler.WaitWorker();
112 145
113 swapchain->AcquireNextImage(); 146 swapchain.AcquireNextImage();
114 const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); 147 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
115 148
116 scheduler->Flush(render_semaphore); 149 scheduler.Flush(render_semaphore);
117 150
118 if (swapchain->Present(render_semaphore)) { 151 if (swapchain.Present(render_semaphore)) {
119 blit_screen->Recreate(); 152 blit_screen.Recreate();
120 } 153 }
121 154 rasterizer.TickFrame();
122 rasterizer->TickFrame();
123 } 155 }
124 156
125 render_window.OnFrameDisplayed(); 157 render_window.OnFrameDisplayed();
126} 158}
127 159
128bool RendererVulkan::Init() try {
129 library = OpenLibrary();
130 instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
131 true, Settings::values.renderer_debug);
132 if (Settings::values.renderer_debug) {
133 debug_callback = CreateDebugCallback(instance);
134 }
135 surface = CreateSurface(instance, render_window);
136
137 InitializeDevice();
138 Report();
139
140 memory_allocator = std::make_unique<MemoryAllocator>(*device);
141
142 state_tracker = std::make_unique<StateTracker>(gpu);
143
144 scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
145
146 const auto& framebuffer = render_window.GetFramebufferLayout();
147 swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
148 swapchain->Create(framebuffer.width, framebuffer.height, false);
149
150 rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
151 cpu_memory, screen_info, *device,
152 *memory_allocator, *state_tracker, *scheduler);
153
154 blit_screen =
155 std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
156 *memory_allocator, *swapchain, *scheduler, screen_info);
157 return true;
158
159} catch (const vk::Exception& exception) {
160 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
161 return false;
162}
163
164void RendererVulkan::ShutDown() {
165 if (!device) {
166 return;
167 }
168 if (const auto& dev = device->GetLogical()) {
169 dev.WaitIdle();
170 }
171 rasterizer.reset();
172 blit_screen.reset();
173 scheduler.reset();
174 swapchain.reset();
175 memory_allocator.reset();
176 device.reset();
177}
178
179void RendererVulkan::InitializeDevice() {
180 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
181 const s32 device_index = Settings::values.vulkan_device.GetValue();
182 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
183 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
184 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
185 }
186 const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
187 device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
188}
189
190void RendererVulkan::Report() const { 160void RendererVulkan::Report() const {
191 const std::string vendor_name{device->GetVendorName()}; 161 const std::string vendor_name{device.GetVendorName()};
192 const std::string model_name{device->GetModelName()}; 162 const std::string model_name{device.GetModelName()};
193 const std::string driver_version = GetDriverVersion(*device); 163 const std::string driver_version = GetDriverVersion(device);
194 const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); 164 const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
195 165
196 const std::string api_version = GetReadableVersion(device->ApiVersion()); 166 const std::string api_version = GetReadableVersion(device.ApiVersion());
197 167
198 const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); 168 const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
199 169
200 LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); 170 LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
201 LOG_INFO(Render_Vulkan, "Device: {}", model_name); 171 LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
209 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 179 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
210} 180}
211 181
212std::vector<std::string> RendererVulkan::EnumerateDevices() try {
213 vk::InstanceDispatch dld;
214 const Common::DynamicLibrary library = OpenLibrary();
215 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
216 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
217 std::vector<std::string> names;
218 names.reserve(physical_devices.size());
219 for (const VkPhysicalDevice device : physical_devices) {
220 names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
221 }
222 return names;
223
224} catch (const vk::Exception& exception) {
225 LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
226 return {};
227}
228
229} // namespace Vulkan 182} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index daf55b9b4..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,8 +9,14 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/dynamic_library.h" 11#include "common/dynamic_library.h"
12
13#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/renderer_vulkan/vk_blit_screen.h"
14#include "video_core/renderer_vulkan/vk_rasterizer.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_state_tracker.h"
17#include "video_core/renderer_vulkan/vk_swapchain.h"
18#include "video_core/vulkan_common/vulkan_device.h"
19#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 20#include "video_core/vulkan_common/vulkan_wrapper.h"
15 21
16namespace Core { 22namespace Core {
@@ -27,20 +33,6 @@ class GPU;
27 33
28namespace Vulkan { 34namespace Vulkan {
29 35
30class Device;
31class StateTracker;
32class MemoryAllocator;
33class VKBlitScreen;
34class VKSwapchain;
35class VKScheduler;
36
37struct VKScreenInfo {
38 VkImageView image_view{};
39 u32 width{};
40 u32 height{};
41 bool is_srgb{};
42};
43
44class RendererVulkan final : public VideoCore::RendererBase { 36class RendererVulkan final : public VideoCore::RendererBase {
45public: 37public:
46 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 38 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -49,15 +41,13 @@ public:
49 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 41 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
50 ~RendererVulkan() override; 42 ~RendererVulkan() override;
51 43
52 bool Init() override;
53 void ShutDown() override;
54 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 44 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
55 45
56 static std::vector<std::string> EnumerateDevices(); 46 VideoCore::RasterizerInterface* ReadRasterizer() override {
47 return &rasterizer;
48 }
57 49
58private: 50private:
59 void InitializeDevice();
60
61 void Report() const; 51 void Report() const;
62 52
63 Core::TelemetrySession& telemetry_session; 53 Core::TelemetrySession& telemetry_session;
@@ -68,18 +58,18 @@ private:
68 vk::InstanceDispatch dld; 58 vk::InstanceDispatch dld;
69 59
70 vk::Instance instance; 60 vk::Instance instance;
71 61 vk::DebugUtilsMessenger debug_callback;
72 vk::SurfaceKHR surface; 62 vk::SurfaceKHR surface;
73 63
74 VKScreenInfo screen_info; 64 VKScreenInfo screen_info;
75 65
76 vk::DebugUtilsMessenger debug_callback; 66 Device device;
77 std::unique_ptr<Device> device; 67 MemoryAllocator memory_allocator;
78 std::unique_ptr<MemoryAllocator> memory_allocator; 68 StateTracker state_tracker;
79 std::unique_ptr<StateTracker> state_tracker; 69 VKScheduler scheduler;
80 std::unique_ptr<VKScheduler> scheduler; 70 VKSwapchain swapchain;
81 std::unique_ptr<VKSwapchain> swapchain; 71 VKBlitScreen blit_screen;
82 std::unique_ptr<VKBlitScreen> blit_screen; 72 RasterizerVulkan rasterizer;
83}; 73};
84 74
85} // namespace Vulkan 75} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 3e3b895e0..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -18,7 +18,6 @@
18#include "video_core/gpu.h" 18#include "video_core/gpu.h"
19#include "video_core/host_shaders/vulkan_present_frag_spv.h" 19#include "video_core/host_shaders/vulkan_present_frag_spv.h"
20#include "video_core/host_shaders/vulkan_present_vert_spv.h" 20#include "video_core/host_shaders/vulkan_present_vert_spv.h"
21#include "video_core/rasterizer_interface.h"
22#include "video_core/renderer_vulkan/renderer_vulkan.h" 21#include "video_core/renderer_vulkan/renderer_vulkan.h"
23#include "video_core/renderer_vulkan/vk_blit_screen.h" 22#include "video_core/renderer_vulkan/vk_blit_screen.h"
24#include "video_core/renderer_vulkan/vk_master_semaphore.h" 23#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
113}; 112};
114 113
115VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, 114VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
116 Core::Frontend::EmuWindow& render_window_, 115 Core::Frontend::EmuWindow& render_window_, const Device& device_,
117 VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
118 MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, 116 MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
119 VKScheduler& scheduler_, const VKScreenInfo& screen_info_) 117 VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
120 : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, 118 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
121 device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, 119 memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
122 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 120 image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
123 resource_ticks.resize(image_count); 121 resource_ticks.resize(image_count);
124 122
125 CreateStaticResources(); 123 CreateStaticResources();
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
150 SetUniformData(data, framebuffer); 148 SetUniformData(data, framebuffer);
151 SetVertexData(data, framebuffer); 149 SetVertexData(data, framebuffer);
152 150
153 const std::span<u8> map = buffer_commit.Map(); 151 const std::span<u8> mapped_span = buffer_commit.Map();
154 std::memcpy(map.data(), &data, sizeof(data)); 152 std::memcpy(mapped_span.data(), &data, sizeof(data));
155 153
156 if (!use_accelerated) { 154 if (!use_accelerated) {
157 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 155 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
159 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 157 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
160 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 158 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
161 const size_t size_bytes = GetSizeInBytes(framebuffer); 159 const size_t size_bytes = GetSizeInBytes(framebuffer);
162 rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
163 160
164 // TODO(Rodrigo): Read this from HLE 161 // TODO(Rodrigo): Read this from HLE
165 constexpr u32 block_height_log2 = 4; 162 constexpr u32 block_height_log2 = 4;
166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); 163 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
167 Tegra::Texture::UnswizzleTexture( 164 Tegra::Texture::UnswizzleTexture(
168 map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, 165 mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
169 framebuffer.width, framebuffer.height, 1, block_height_log2, 0); 166 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
170 167
171 const VkBufferImageCopy copy{ 168 const VkBufferImageCopy copy{
172 .bufferOffset = image_offset, 169 .bufferOffset = image_offset,
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
266 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
267 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
268 }); 265 });
269
270 return *semaphores[image_index]; 266 return *semaphores[image_index];
271} 267}
272 268
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index b52576957..5e3177685 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -38,12 +38,18 @@ class RasterizerVulkan;
38class VKScheduler; 38class VKScheduler;
39class VKSwapchain; 39class VKSwapchain;
40 40
41class VKBlitScreen final { 41struct VKScreenInfo {
42 VkImageView image_view{};
43 u32 width{};
44 u32 height{};
45 bool is_srgb{};
46};
47
48class VKBlitScreen {
42public: 49public:
43 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, 50 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
44 Core::Frontend::EmuWindow& render_window, 51 Core::Frontend::EmuWindow& render_window, const Device& device,
45 VideoCore::RasterizerInterface& rasterizer, const Device& device, 52 MemoryAllocator& memory_manager, VKSwapchain& swapchain,
46 MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
47 VKScheduler& scheduler, const VKScreenInfo& screen_info); 53 VKScheduler& scheduler, const VKScreenInfo& screen_info);
48 ~VKBlitScreen(); 54 ~VKBlitScreen();
49 55
@@ -84,7 +90,6 @@ private:
84 90
85 Core::Memory::Memory& cpu_memory; 91 Core::Memory::Memory& cpu_memory;
86 Core::Frontend::EmuWindow& render_window; 92 Core::Frontend::EmuWindow& render_window;
87 VideoCore::RasterizerInterface& rasterizer;
88 const Device& device; 93 const Device& device;
89 MemoryAllocator& memory_allocator; 94 MemoryAllocator& memory_allocator;
90 VKSwapchain& swapchain; 95 VKSwapchain& swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..848eedd66 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,308 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstring> 7#include <cstring>
7#include <memory> 8#include <span>
9#include <vector>
8 10
9#include "core/core.h"
10#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h" 13#include "video_core/renderer_vulkan/vk_buffer_cache.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
13#include "video_core/renderer_vulkan/vk_stream_buffer.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
14#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
16 20
17namespace Vulkan { 21namespace Vulkan {
18
19namespace { 22namespace {
23VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
24 return VkBufferCopy{
25 .srcOffset = copy.src_offset,
26 .dstOffset = copy.dst_offset,
27 .size = copy.size,
28 };
29}
20 30
21constexpr VkBufferUsageFlags BUFFER_USAGE = 31VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
22 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | 32 if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
23 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; 33 return VK_INDEX_TYPE_UINT8_EXT;
24 34 }
25constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = 35 if (num_elements <= 0xffff) {
26 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 36 return VK_INDEX_TYPE_UINT16;
27 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 37 }
28 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; 38 return VK_INDEX_TYPE_UINT32;
29 39}
30constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 40
34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = 41size_t BytesPerIndex(VkIndexType index_type) {
35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; 42 switch (index_type) {
43 case VK_INDEX_TYPE_UINT8_EXT:
44 return 1;
45 case VK_INDEX_TYPE_UINT16:
46 return 2;
47 case VK_INDEX_TYPE_UINT32:
48 return 4;
49 default:
50 UNREACHABLE_MSG("Invalid index type={}", index_type);
51 return 1;
52 }
53}
36 54
55template <typename T>
56std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
57 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
58 std::ranges::transform(indices, indices.begin(),
59 [quad, first](u32 index) { return first + index + quad * 4; });
60 return indices;
61}
37} // Anonymous namespace 62} // Anonymous namespace
38 63
39Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, 64Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
40 StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) 65 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
41 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ 66
42 staging_pool_} { 67Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
43 buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 68 VAddr cpu_addr_, u64 size_bytes_)
69 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
70 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 71 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 72 .pNext = nullptr,
46 .flags = 0, 73 .flags = 0,
47 .size = static_cast<VkDeviceSize>(size_), 74 .size = SizeBytes(),
48 .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 75 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
76 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
77 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
79 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
49 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 80 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
50 .queueFamilyIndexCount = 0, 81 .queueFamilyIndexCount = 0,
51 .pQueueFamilyIndices = nullptr, 82 .pQueueFamilyIndices = nullptr,
52 }); 83 });
53 commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 84 if (runtime.device.HasDebuggingToolAttached()) {
85 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
86 }
87 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
54} 88}
55 89
56Buffer::~Buffer() = default; 90BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
91 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 VKDescriptorPool& descriptor_pool)
94 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
95 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
96 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
97 quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
57 98
58void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { 99StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
59 const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); 100 return staging_pool.Request(size, MemoryUsage::Upload);
60 std::memcpy(staging.mapped_span.data(), data, data_size); 101}
61 102
62 scheduler.RequestOutsideRenderPassOperationContext(); 103StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
104 return staging_pool.Request(size, MemoryUsage::Download);
105}
63 106
64 const VkBuffer handle = Handle(); 107void BufferCacheRuntime::Finish() {
65 scheduler.Record([staging = staging.buffer, handle, offset, data_size, 108 scheduler.Finish();
66 &device = device](vk::CommandBuffer cmdbuf) { 109}
67 const VkBufferMemoryBarrier read_barrier{ 110
68 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 111void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
69 .pNext = nullptr, 112 std::span<const VideoCommon::BufferCopy> copies) {
70 .srcAccessMask = 113 static constexpr VkMemoryBarrier READ_BARRIER{
71 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | 114 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
72 VK_ACCESS_HOST_WRITE_BIT | 115 .pNext = nullptr,
73 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), 116 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
74 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 117 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
75 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 118 };
76 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 119 static constexpr VkMemoryBarrier WRITE_BARRIER{
77 .buffer = handle, 120 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
78 .offset = offset, 121 .pNext = nullptr,
79 .size = data_size, 122 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
80 }; 123 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
81 const VkBufferMemoryBarrier write_barrier{ 124 };
82 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 125 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
83 .pNext = nullptr, 126 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
84 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 127 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
85 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 128 scheduler.RequestOutsideRenderPassOperationContext();
86 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 129 scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
87 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
88 .buffer = handle,
89 .offset = offset,
90 .size = data_size,
91 };
92 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 130 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
93 0, read_barrier); 131 0, READ_BARRIER);
94 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); 132 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
95 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, 133 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
96 write_barrier); 134 0, WRITE_BARRIER);
97 }); 135 });
98} 136}
99 137
100void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
101 auto staging = staging_pool.Request(data_size, MemoryUsage::Download); 139 u32 base_vertex, u32 num_indices, VkBuffer buffer,
102 scheduler.RequestOutsideRenderPassOperationContext(); 140 u32 offset, [[maybe_unused]] u32 size) {
141 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
142 VkDeviceSize vk_offset = offset;
143 VkBuffer vk_buffer = buffer;
144 if (topology == PrimitiveTopology::Quads) {
145 vk_index_type = VK_INDEX_TYPE_UINT32;
146 std::tie(vk_buffer, vk_offset) =
147 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
148 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
149 vk_index_type = VK_INDEX_TYPE_UINT16;
150 std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
151 }
152 if (vk_buffer == VK_NULL_HANDLE) {
153 // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
154 ReserveNullIndexBuffer();
155 vk_buffer = *null_index_buffer;
156 }
157 scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
158 cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
159 });
160}
103 161
104 const VkBuffer handle = Handle(); 162void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
105 scheduler.Record( 163 ReserveQuadArrayLUT(first + count, true);
106 [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
107 const VkBufferMemoryBarrier barrier{
108 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
109 .pNext = nullptr,
110 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
111 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
112 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
113 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
114 .buffer = handle,
115 .offset = offset,
116 .size = data_size,
117 };
118
119 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
120 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
121 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
122 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
123 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
124 });
125 scheduler.Finish();
126 164
127 std::memcpy(data, staging.mapped_span.data(), data_size); 165 // The LUT has the indices 0, 1, 2, and 3 copied as an array
166 // To apply these 'first' offsets we can apply an offset based on the modulus.
167 const VkIndexType index_type = quad_array_lut_index_type;
168 const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
169 const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
170 scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
171 cmdbuf.BindIndexBuffer(buffer, offset, index_type);
172 });
128} 173}
129 174
130void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 175void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
131 std::size_t copy_size) { 176 u32 stride) {
132 scheduler.RequestOutsideRenderPassOperationContext(); 177 if (device.IsExtExtendedDynamicStateSupported()) {
178 scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
179 const VkDeviceSize vk_offset = offset;
180 const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
181 const VkDeviceSize vk_stride = stride;
182 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
183 });
184 } else {
185 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
186 cmdbuf.BindVertexBuffer(index, buffer, offset);
187 });
188 }
189}
133 190
134 const VkBuffer dst_buffer = Handle(); 191void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
135 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, 192 u32 size) {
136 copy_size](vk::CommandBuffer cmdbuf) { 193 if (!device.IsExtTransformFeedbackSupported()) {
137 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); 194 // Already logged in the rasterizer
138 195 return;
139 std::array<VkBufferMemoryBarrier, 2> barriers; 196 }
140 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 197 scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
141 barriers[0].pNext = nullptr; 198 const VkDeviceSize vk_offset = offset;
142 barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 199 const VkDeviceSize vk_size = size;
143 barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; 200 cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
144 barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
145 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
146 barriers[0].buffer = src_buffer;
147 barriers[0].offset = src_offset;
148 barriers[0].size = copy_size;
149 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
150 barriers[1].pNext = nullptr;
151 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
152 barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
153 barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
154 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
155 barriers[1].buffer = dst_buffer;
156 barriers[1].offset = dst_offset;
157 barriers[1].size = copy_size;
158 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
159 barriers, {});
160 }); 201 });
161} 202}
162 203
163VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 204void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
164 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 205 update_descriptor_queue.AddBuffer(buffer, offset, size);
165 const Device& device_, MemoryAllocator& memory_allocator_, 206}
166 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
167 StagingBufferPool& staging_pool_)
168 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
169 cpu_memory_, stream_buffer_},
170 device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
171 staging_pool{staging_pool_} {}
172 207
173VKBufferCache::~VKBufferCache() = default; 208void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
209 if (num_indices <= current_num_indices) {
210 return;
211 }
212 if (wait_for_idle) {
213 scheduler.Finish();
214 }
215 current_num_indices = num_indices;
216 quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
174 217
175std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 218 const u32 num_quads = num_indices / 4;
176 return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, 219 const u32 num_triangle_indices = num_quads * 6;
177 size); 220 const u32 num_first_offset_copies = 4;
221 const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
222 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
223 quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
224 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
225 .pNext = nullptr,
226 .flags = 0,
227 .size = size_bytes,
228 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
229 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
230 .queueFamilyIndexCount = 0,
231 .pQueueFamilyIndices = nullptr,
232 });
233 if (device.HasDebuggingToolAttached()) {
234 quad_array_lut.SetObjectNameEXT("Quad LUT");
235 }
236 quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
237
238 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
239 u8* staging_data = staging.mapped_span.data();
240 const size_t quad_size = bytes_per_index * 6;
241 for (u32 first = 0; first < num_first_offset_copies; ++first) {
242 for (u32 quad = 0; quad < num_quads; ++quad) {
243 switch (quad_array_lut_index_type) {
244 case VK_INDEX_TYPE_UINT8_EXT:
245 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
246 break;
247 case VK_INDEX_TYPE_UINT16:
248 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
249 break;
250 case VK_INDEX_TYPE_UINT32:
251 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
252 break;
253 default:
254 UNREACHABLE();
255 break;
256 }
257 staging_data += quad_size;
258 }
259 }
260 scheduler.RequestOutsideRenderPassOperationContext();
261 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
262 dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
263 const VkBufferCopy copy{
264 .srcOffset = src_offset,
265 .dstOffset = 0,
266 .size = size_bytes,
267 };
268 const VkBufferMemoryBarrier write_barrier{
269 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
270 .pNext = nullptr,
271 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
272 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
273 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
274 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
275 .buffer = dst_buffer,
276 .offset = 0,
277 .size = size_bytes,
278 };
279 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
280 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
281 0, write_barrier);
282 });
178} 283}
179 284
180VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { 285void BufferCacheRuntime::ReserveNullIndexBuffer() {
181 size = std::max(size, std::size_t(4)); 286 if (null_index_buffer) {
182 const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); 287 return;
288 }
289 null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
290 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
291 .pNext = nullptr,
292 .flags = 0,
293 .size = 4,
294 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
295 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
296 .queueFamilyIndexCount = 0,
297 .pQueueFamilyIndices = nullptr,
298 });
299 if (device.HasDebuggingToolAttached()) {
300 null_index_buffer.SetObjectNameEXT("Null index buffer");
301 }
302 null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
303
183 scheduler.RequestOutsideRenderPassOperationContext(); 304 scheduler.RequestOutsideRenderPassOperationContext();
184 scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { 305 scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
185 cmdbuf.FillBuffer(buffer, 0, size, 0); 306 cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
186 }); 307 });
187 return {empty.buffer, 0, 0};
188} 308}
189 309
190} // namespace Vulkan 310} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..041e6515c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,124 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8
9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 7#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_compute_pass.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
12#include "video_core/renderer_vulkan/vk_stream_buffer.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 11#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 12#include "video_core/vulkan_common/vulkan_wrapper.h"
15 13
16namespace Vulkan { 14namespace Vulkan {
17 15
18class Device; 16class Device;
17class VKDescriptorPool;
19class VKScheduler; 18class VKScheduler;
19class VKUpdateDescriptorQueue;
20 20
21class Buffer final : public VideoCommon::BufferBlock { 21class BufferCacheRuntime;
22public:
23 explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
24 StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
25 ~Buffer();
26
27 void Upload(std::size_t offset, std::size_t data_size, const u8* data);
28 22
29 void Download(std::size_t offset, std::size_t data_size, u8* data); 23class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
30 24public:
31 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 25 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
32 std::size_t copy_size); 26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_);
33 28
34 VkBuffer Handle() const { 29 [[nodiscard]] VkBuffer Handle() const noexcept {
35 return *buffer; 30 return *buffer;
36 } 31 }
37 32
38 u64 Address() const { 33 operator VkBuffer() const noexcept {
39 return 0; 34 return *buffer;
40 } 35 }
41 36
42private: 37private:
43 const Device& device;
44 VKScheduler& scheduler;
45 StagingBufferPool& staging_pool;
46
47 vk::Buffer buffer; 38 vk::Buffer buffer;
48 MemoryCommit commit; 39 MemoryCommit commit;
49}; 40};
50 41
51class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 42class BufferCacheRuntime {
43 friend Buffer;
44
45 using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
46 using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
47
52public: 48public:
53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, 49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
55 const Device& device, MemoryAllocator& memory_allocator, 51 VKUpdateDescriptorQueue& update_descriptor_queue_,
56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer, 52 VKDescriptorPool& descriptor_pool);
57 StagingBufferPool& staging_pool); 53
58 ~VKBufferCache(); 54 void Finish();
55
56 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
57
58 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
59
60 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
61 std::span<const VideoCommon::BufferCopy> copies);
62
63 void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
64 u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
59 65
60 BufferInfo GetEmptyBuffer(std::size_t size) override; 66 void BindQuadArrayIndexBuffer(u32 first, u32 count);
61 67
62protected: 68 void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
63 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 69
70 void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
71
72 std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
73 [[maybe_unused]] u32 binding_index, u32 size) {
74 const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
75 BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
76 return ref.mapped_span;
77 }
78
79 void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
80 BindBuffer(buffer, offset, size);
81 }
82
83 void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
84 [[maybe_unused]] bool is_written) {
85 BindBuffer(buffer, offset, size);
86 }
64 87
65private: 88private:
89 void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
90
91 void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
92
93 void ReserveNullIndexBuffer();
94
66 const Device& device; 95 const Device& device;
67 MemoryAllocator& memory_allocator; 96 MemoryAllocator& memory_allocator;
68 VKScheduler& scheduler; 97 VKScheduler& scheduler;
69 StagingBufferPool& staging_pool; 98 StagingBufferPool& staging_pool;
99 VKUpdateDescriptorQueue& update_descriptor_queue;
100
101 vk::Buffer quad_array_lut;
102 MemoryCommit quad_array_lut_commit;
103 VkIndexType quad_array_lut_index_type{};
104 u32 current_num_indices = 0;
105
106 vk::Buffer null_index_buffer;
107 MemoryCommit null_index_buffer_commit;
108
109 Uint8Pass uint8_pass;
110 QuadIndexedPass quad_index_pass;
70}; 111};
71 112
113struct BufferCacheParams {
114 using Runtime = Vulkan::BufferCacheRuntime;
115 using Buffer = Vulkan::Buffer;
116
117 static constexpr bool IS_OPENGL = false;
118 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
119 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
120 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
121 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
122 static constexpr bool USE_MEMORY_MAPS = true;
123};
124
125using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
126
72} // namespace Vulkan 127} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +22,7 @@
22#include "video_core/vulkan_common/vulkan_wrapper.h" 22#include "video_core/vulkan_common/vulkan_wrapper.h"
23 23
24namespace Vulkan { 24namespace Vulkan {
25
26namespace { 25namespace {
27
28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
29 return {
30 .binding = 0,
31 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
32 .descriptorCount = 1,
33 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
34 .pImmutableSamplers = nullptr,
35 };
36}
37
38VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
39 return {
40 .dstBinding = 0,
41 .dstArrayElement = 0,
42 .descriptorCount = 1,
43 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
44 .offset = 0,
45 .stride = sizeof(DescriptorUpdateEntry),
46 };
47}
48
49VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 26VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
50 return { 27 return {
51 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 28 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
162 return set; 139 return set;
163} 140}
164 141
165QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
166 VKDescriptorPool& descriptor_pool_,
167 StagingBufferPool& staging_buffer_pool_,
168 VKUpdateDescriptorQueue& update_descriptor_queue_)
169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
173 update_descriptor_queue{update_descriptor_queue_} {}
174
175QuadArrayPass::~QuadArrayPass() = default;
176
177std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
178 const u32 num_triangle_vertices = (num_vertices / 4) * 6;
179 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
180 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
181
182 update_descriptor_queue.Acquire();
183 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
184 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
185
186 scheduler.RequestOutsideRenderPassOperationContext();
187
188 ASSERT(num_vertices % 4 == 0);
189 const u32 num_quads = num_vertices / 4;
190 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
191 num_quads, first, set](vk::CommandBuffer cmdbuf) {
192 constexpr u32 dispatch_size = 1024;
193 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
195 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
196 cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
197
198 VkBufferMemoryBarrier barrier;
199 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
200 barrier.pNext = nullptr;
201 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
202 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
203 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
204 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
205 barrier.buffer = buffer;
206 barrier.offset = 0;
207 barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
208 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
209 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
210 });
211 return {staging_ref.buffer, 0};
212}
213
214Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, 142Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
215 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, 143 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
216 VKUpdateDescriptorQueue& update_descriptor_queue_) 144 VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
221 149
222Uint8Pass::~Uint8Pass() = default; 150Uint8Pass::~Uint8Pass() = default;
223 151
224std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, 152std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
225 u64 src_offset) { 153 u32 src_offset) {
226 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 154 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
227 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 155 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
228 156
229 update_descriptor_queue.Acquire(); 157 update_descriptor_queue.Acquire();
230 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 158 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
231 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 159 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
232 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 160 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
233 161
234 scheduler.RequestOutsideRenderPassOperationContext(); 162 scheduler.RequestOutsideRenderPassOperationContext();
235 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 163 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
236 num_vertices](vk::CommandBuffer cmdbuf) { 164 num_vertices](vk::CommandBuffer cmdbuf) {
237 constexpr u32 dispatch_size = 1024; 165 static constexpr u32 DISPATCH_SIZE = 1024;
166 static constexpr VkMemoryBarrier WRITE_BARRIER{
167 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
168 .pNext = nullptr,
169 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
170 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
171 };
238 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 172 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
239 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 173 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
240 cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); 174 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
241
242 VkBufferMemoryBarrier barrier;
243 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
244 barrier.pNext = nullptr;
245 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
246 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
247 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
248 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
249 barrier.buffer = buffer;
250 barrier.offset = 0;
251 barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
252 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 175 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
253 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 176 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
254 }); 177 });
255 return {staging_ref.buffer, 0}; 178 return {staging.buffer, staging.offset};
256} 179}
257 180
258QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 181QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
267 190
268QuadIndexedPass::~QuadIndexedPass() = default; 191QuadIndexedPass::~QuadIndexedPass() = default;
269 192
270std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( 193std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
271 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 194 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
272 VkBuffer src_buffer, u64 src_offset) { 195 VkBuffer src_buffer, u32 src_offset) {
273 const u32 index_shift = [index_format] { 196 const u32 index_shift = [index_format] {
274 switch (index_format) { 197 switch (index_format) {
275 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: 198 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
286 const u32 num_tri_vertices = (num_vertices / 4) * 6; 209 const u32 num_tri_vertices = (num_vertices / 4) * 6;
287 210
288 const std::size_t staging_size = num_tri_vertices * sizeof(u32); 211 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
289 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 212 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
290 213
291 update_descriptor_queue.Acquire(); 214 update_descriptor_queue.Acquire();
292 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 215 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
293 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 216 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
294 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 217 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
295 218
296 scheduler.RequestOutsideRenderPassOperationContext(); 219 scheduler.RequestOutsideRenderPassOperationContext();
297 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 220 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
298 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 221 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
299 static constexpr u32 dispatch_size = 1024; 222 static constexpr u32 DISPATCH_SIZE = 1024;
223 static constexpr VkMemoryBarrier WRITE_BARRIER{
224 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
225 .pNext = nullptr,
226 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
227 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
228 };
300 const std::array push_constants = {base_vertex, index_shift}; 229 const std::array push_constants = {base_vertex, index_shift};
301 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 230 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
302 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 231 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
303 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 232 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
304 &push_constants); 233 &push_constants);
305 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); 234 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
306
307 VkBufferMemoryBarrier barrier;
308 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
309 barrier.pNext = nullptr;
310 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
311 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
312 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
313 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
314 barrier.buffer = buffer;
315 barrier.offset = 0;
316 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
317 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 235 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
318 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 236 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
319 }); 237 });
320 return {staging_ref.buffer, 0}; 238 return {staging.buffer, staging.offset};
321} 239}
322 240
323} // namespace Vulkan 241} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
41 vk::ShaderModule module; 41 vk::ShaderModule module;
42}; 42};
43 43
44class QuadArrayPass final : public VKComputePass {
45public:
46 explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
47 VKDescriptorPool& descriptor_pool_,
48 StagingBufferPool& staging_buffer_pool_,
49 VKUpdateDescriptorQueue& update_descriptor_queue_);
50 ~QuadArrayPass();
51
52 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
53
54private:
55 VKScheduler& scheduler;
56 StagingBufferPool& staging_buffer_pool;
57 VKUpdateDescriptorQueue& update_descriptor_queue;
58};
59
60class Uint8Pass final : public VKComputePass { 44class Uint8Pass final : public VKComputePass {
61public: 45public:
62 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 46 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,10 @@ public:
64 VKUpdateDescriptorQueue& update_descriptor_queue_); 48 VKUpdateDescriptorQueue& update_descriptor_queue_);
65 ~Uint8Pass(); 49 ~Uint8Pass();
66 50
67 std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); 51 /// Assemble uint8 indices into an uint16 index buffer
52 /// Returns a pair with the staging buffer, and the offset where the assembled data is
53 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
54 u32 src_offset);
68 55
69private: 56private:
70 VKScheduler& scheduler; 57 VKScheduler& scheduler;
@@ -80,9 +67,9 @@ public:
80 VKUpdateDescriptorQueue& update_descriptor_queue_); 67 VKUpdateDescriptorQueue& update_descriptor_queue_);
81 ~QuadIndexedPass(); 68 ~QuadIndexedPass();
82 69
83 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 70 std::pair<VkBuffer, VkDeviceSize> Assemble(
84 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, 71 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
85 u64 src_offset); 72 u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
86 73
87private: 74private:
88 VKScheduler& scheduler; 75 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
45} 45}
46 46
47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
48 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 48 TextureCache& texture_cache_, BufferCache& buffer_cache_,
49 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 49 VKQueryCache& query_cache_, const Device& device_,
50 VKScheduler& scheduler_) 50 VKScheduler& scheduler_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
52 scheduler{scheduler_} {} 52 scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
22namespace Vulkan { 22namespace Vulkan {
23 23
24class Device; 24class Device;
25class VKBufferCache;
26class VKQueryCache; 25class VKQueryCache;
27class VKScheduler; 26class VKScheduler;
28 27
@@ -45,14 +44,14 @@ private:
45using Fence = std::shared_ptr<InnerFence>; 44using Fence = std::shared_ptr<InnerFence>;
46 45
47using GenericFenceManager = 46using GenericFenceManager =
48 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; 47 VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
49 48
50class VKFenceManager final : public GenericFenceManager { 49class VKFenceManager final : public GenericFenceManager {
51public: 50public:
52 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 51 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
53 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 52 TextureCache& texture_cache, BufferCache& buffer_cache,
54 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 53 VKQueryCache& query_cache, const Device& device,
55 VKScheduler& scheduler_); 54 VKScheduler& scheduler);
56 55
57protected: 56protected:
58 Fence CreateFence(u32 value, bool is_stubbed) override; 57 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..684d4e3a6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
8#include <mutex> 8#include <mutex>
9#include <vector> 9#include <vector>
10 10
11#include <boost/container/static_vector.hpp>
12
13#include "common/alignment.h" 11#include "common/alignment.h"
14#include "common/assert.h" 12#include "common/assert.h"
15#include "common/logging/log.h" 13#include "common/logging/log.h"
@@ -24,7 +22,6 @@
24#include "video_core/renderer_vulkan/maxwell_to_vk.h" 22#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h" 23#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); 48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); 49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
57MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
58MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
59MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
60 51
61namespace { 52namespace {
53struct DrawParams {
54 u32 base_instance;
55 u32 num_instances;
56 u32 base_vertex;
57 u32 num_vertices;
58 bool is_indexed;
59};
62 60
63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); 61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
64 62
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
67 const float width = src.scale_x * 2.0f; 65 const float width = src.scale_x * 2.0f;
68 const float height = src.scale_y * 2.0f; 66 const float height = src.scale_y * 2.0f;
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; 67 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
70
71 VkViewport viewport{ 68 VkViewport viewport{
72 .x = src.translate_x - src.scale_x, 69 .x = src.translate_x - src.scale_x,
73 .y = src.translate_y - src.scale_y, 70 .y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
76 .minDepth = src.translate_z - src.scale_z * reduce_z, 73 .minDepth = src.translate_z - src.scale_z * reduce_z,
77 .maxDepth = src.translate_z + src.scale_z, 74 .maxDepth = src.translate_z + src.scale_z,
78 }; 75 };
79
80 if (!device.IsExtDepthRangeUnrestrictedSupported()) { 76 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
81 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); 77 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
82 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); 78 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
83 } 79 }
84
85 return viewport; 80 return viewport;
86} 81}
87 82
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); 141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
147} 142}
148 143
149template <size_t N>
150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
151 std::array<VkDeviceSize, N> expanded;
152 std::copy(strides.begin(), strides.end(), expanded.begin());
153 return expanded;
154}
155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { 144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) { 145 if (entry.is_buffer) {
158 return ImageViewType::e2D; 146 return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
221 } 209 }
222} 210}
223 211
224} // Anonymous namespace 212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
225 213 bool is_indexed) {
226class BufferBindings final { 214 DrawParams params{
227public: 215 .base_instance = regs.vb_base_instance,
228 void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { 216 .num_instances = is_instanced ? num_instances : 1,
229 vertex.buffers[vertex.num_buffers] = buffer; 217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
230 vertex.offsets[vertex.num_buffers] = offset; 218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
231 vertex.sizes[vertex.num_buffers] = size; 219 .is_indexed = is_indexed,
232 vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); 220 };
233 ++vertex.num_buffers; 221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
234 } 222 // 6 triangle vertices per quad, base vertex is part of the index
235 223 // See BindQuadArrayIndexBuffer for more details
236 void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { 224 params.num_vertices = (params.num_vertices / 4) * 6;
237 index.buffer = buffer; 225 params.base_vertex = 0;
238 index.offset = offset; 226 params.is_indexed = true;
239 index.type = type;
240 }
241
242 void Bind(const Device& device, VKScheduler& scheduler) const {
243 // Use this large switch case to avoid dispatching more memory in the record lambda than
244 // what we need. It looks horrible, but it's the best we can do on standard C++.
245 switch (vertex.num_buffers) {
246 case 0:
247 return BindStatic<0>(device, scheduler);
248 case 1:
249 return BindStatic<1>(device, scheduler);
250 case 2:
251 return BindStatic<2>(device, scheduler);
252 case 3:
253 return BindStatic<3>(device, scheduler);
254 case 4:
255 return BindStatic<4>(device, scheduler);
256 case 5:
257 return BindStatic<5>(device, scheduler);
258 case 6:
259 return BindStatic<6>(device, scheduler);
260 case 7:
261 return BindStatic<7>(device, scheduler);
262 case 8:
263 return BindStatic<8>(device, scheduler);
264 case 9:
265 return BindStatic<9>(device, scheduler);
266 case 10:
267 return BindStatic<10>(device, scheduler);
268 case 11:
269 return BindStatic<11>(device, scheduler);
270 case 12:
271 return BindStatic<12>(device, scheduler);
272 case 13:
273 return BindStatic<13>(device, scheduler);
274 case 14:
275 return BindStatic<14>(device, scheduler);
276 case 15:
277 return BindStatic<15>(device, scheduler);
278 case 16:
279 return BindStatic<16>(device, scheduler);
280 case 17:
281 return BindStatic<17>(device, scheduler);
282 case 18:
283 return BindStatic<18>(device, scheduler);
284 case 19:
285 return BindStatic<19>(device, scheduler);
286 case 20:
287 return BindStatic<20>(device, scheduler);
288 case 21:
289 return BindStatic<21>(device, scheduler);
290 case 22:
291 return BindStatic<22>(device, scheduler);
292 case 23:
293 return BindStatic<23>(device, scheduler);
294 case 24:
295 return BindStatic<24>(device, scheduler);
296 case 25:
297 return BindStatic<25>(device, scheduler);
298 case 26:
299 return BindStatic<26>(device, scheduler);
300 case 27:
301 return BindStatic<27>(device, scheduler);
302 case 28:
303 return BindStatic<28>(device, scheduler);
304 case 29:
305 return BindStatic<29>(device, scheduler);
306 case 30:
307 return BindStatic<30>(device, scheduler);
308 case 31:
309 return BindStatic<31>(device, scheduler);
310 case 32:
311 return BindStatic<32>(device, scheduler);
312 }
313 UNREACHABLE();
314 }
315
316private:
317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
318 struct {
319 size_t num_buffers = 0;
320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
323 std::array<u16, Maxwell::NumVertexArrays> strides;
324 } vertex;
325
326 struct {
327 VkBuffer buffer = nullptr;
328 VkDeviceSize offset;
329 VkIndexType type;
330 } index;
331
332 template <size_t N>
333 void BindStatic(const Device& device, VKScheduler& scheduler) const {
334 if (device.IsExtExtendedDynamicStateSupported()) {
335 if (index.buffer) {
336 BindStatic<N, true, true>(scheduler);
337 } else {
338 BindStatic<N, false, true>(scheduler);
339 }
340 } else {
341 if (index.buffer) {
342 BindStatic<N, true, false>(scheduler);
343 } else {
344 BindStatic<N, false, false>(scheduler);
345 }
346 }
347 }
348
349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
350 void BindStatic(VKScheduler& scheduler) const {
351 static_assert(N <= Maxwell::NumVertexArrays);
352 if constexpr (N == 0) {
353 return;
354 }
355
356 std::array<VkBuffer, N> buffers;
357 std::array<VkDeviceSize, N> offsets;
358 std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
359 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
360
361 if constexpr (has_extended_dynamic_state) {
362 // With extended dynamic states we can specify the length and stride of a vertex buffer
363 std::array<VkDeviceSize, N> sizes;
364 std::array<u16, N> strides;
365 std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
366 std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
367
368 if constexpr (is_indexed) {
369 scheduler.Record(
370 [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
371 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
372 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
373 offsets.data(), sizes.data(),
374 ExpandStrides(strides).data());
375 });
376 } else {
377 scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
378 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
379 offsets.data(), sizes.data(),
380 ExpandStrides(strides).data());
381 });
382 }
383 return;
384 }
385
386 if constexpr (is_indexed) {
387 // Indexed draw
388 scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
389 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
390 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
391 });
392 } else {
393 // Array draw
394 scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
395 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
396 });
397 }
398 }
399};
400
401void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
402 if (is_indexed) {
403 cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
404 } else {
405 cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
406 } 227 }
228 return params;
407} 229}
230} // Anonymous namespace
408 231
409RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 232RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
410 Tegra::MemoryManager& gpu_memory_, 233 Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, 237 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, 238 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
416 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, 239 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), 240 state_tracker{state_tracker_}, scheduler{scheduler_},
418 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 241 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
419 update_descriptor_queue(device, scheduler), 242 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool), 243 blit_image(device, scheduler, state_tracker, descriptor_pool),
421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
424 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, 244 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 245 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
246 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
247 update_descriptor_queue, descriptor_pool),
248 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 249 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
427 descriptor_pool, update_descriptor_queue), 250 descriptor_pool, update_descriptor_queue),
428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
429 stream_buffer, staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, 251 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), 252 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
432 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 253 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
433 scheduler.SetQueryCache(query_cache); 254 scheduler.SetQueryCache(query_cache);
434 if (device.UseAsynchronousShaders()) { 255 if (device.UseAsynchronousShaders()) {
@@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
449 GraphicsPipelineCacheKey key; 270 GraphicsPipelineCacheKey key;
450 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); 271 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
451 272
452 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); 273 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
453
454 BufferBindings buffer_bindings;
455 const DrawParameters draw_params =
456 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
457 274
458 auto lock = texture_cache.AcquireLock();
459 texture_cache.SynchronizeGraphicsDescriptors(); 275 texture_cache.SynchronizeGraphicsDescriptors();
460
461 texture_cache.UpdateRenderTargets(false); 276 texture_cache.UpdateRenderTargets(false);
462 277
463 const auto shaders = pipeline_cache.GetShaders(); 278 const auto shaders = pipeline_cache.GetShaders();
464 key.shaders = GetShaderAddresses(shaders); 279 key.shaders = GetShaderAddresses(shaders);
465 SetupShaderDescriptors(shaders); 280 SetupShaderDescriptors(shaders, is_indexed);
466
467 buffer_cache.Unmap();
468 281
469 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 282 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
470 key.renderpass = framebuffer->RenderPass(); 283 key.renderpass = framebuffer->RenderPass();
@@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
476 return; 289 return;
477 } 290 }
478 291
479 buffer_bindings.Bind(device, scheduler);
480
481 BeginTransformFeedback(); 292 BeginTransformFeedback();
482 293
483 scheduler.RequestRenderpass(framebuffer); 294 scheduler.RequestRenderpass(framebuffer);
484 scheduler.BindGraphicsPipeline(pipeline->GetHandle()); 295 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
485 UpdateDynamicStates(); 296 UpdateDynamicStates();
486 297
487 const auto pipeline_layout = pipeline->GetLayout(); 298 const auto& regs = maxwell3d.regs;
488 const auto descriptor_set = pipeline->CommitDescriptorSet(); 299 const u32 num_instances = maxwell3d.mme_draw.instance_count;
300 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
301 const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
302 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
489 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 303 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
490 if (descriptor_set) { 304 if (descriptor_set) {
491 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 305 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
492 DESCRIPTOR_SET, descriptor_set, {}); 306 DESCRIPTOR_SET, descriptor_set, nullptr);
307 }
308 if (draw_params.is_indexed) {
309 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
310 draw_params.base_vertex, draw_params.base_instance);
311 } else {
312 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance);
493 } 314 }
494 draw_params.Draw(cmdbuf);
495 }); 315 });
496 316
497 EndTransformFeedback(); 317 EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
515 return; 335 return;
516 } 336 }
517 337
518 auto lock = texture_cache.AcquireLock(); 338 std::scoped_lock lock{texture_cache.mutex};
519 texture_cache.UpdateRenderTargets(true); 339 texture_cache.UpdateRenderTargets(true);
520 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 340 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
521 const VkExtent2D render_area = framebuffer->RenderArea(); 341 const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
559 if (use_stencil) { 379 if (use_stencil) {
560 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; 380 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
561 } 381 }
562
563 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, 382 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
564 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { 383 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
565 VkClearAttachment attachment; 384 VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
580 auto& pipeline = pipeline_cache.GetComputePipeline({ 399 auto& pipeline = pipeline_cache.GetComputePipeline({
581 .shader = code_addr, 400 .shader = code_addr,
582 .shared_memory_size = launch_desc.shared_alloc, 401 .shared_memory_size = launch_desc.shared_alloc,
583 .workgroup_size = 402 .workgroup_size{
584 { 403 launch_desc.block_dim_x,
585 launch_desc.block_dim_x, 404 launch_desc.block_dim_y,
586 launch_desc.block_dim_y, 405 launch_desc.block_dim_z,
587 launch_desc.block_dim_z, 406 },
588 },
589 }); 407 });
590 408
591 // Compute dispatches can't be executed inside a renderpass 409 // Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
594 image_view_indices.clear(); 412 image_view_indices.clear();
595 sampler_handles.clear(); 413 sampler_handles.clear();
596 414
597 auto lock = texture_cache.AcquireLock(); 415 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
598 texture_cache.SynchronizeComputeDescriptors();
599 416
600 const auto& entries = pipeline.GetEntries(); 417 const auto& entries = pipeline.GetEntries();
418 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
419 buffer_cache.UnbindComputeStorageBuffers();
420 u32 ssbo_index = 0;
421 for (const auto& buffer : entries.global_buffers) {
422 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
423 buffer.is_written);
424 ++ssbo_index;
425 }
426 buffer_cache.UpdateComputeBuffers();
427
428 texture_cache.SynchronizeComputeDescriptors();
429
601 SetupComputeUniformTexels(entries); 430 SetupComputeUniformTexels(entries);
602 SetupComputeTextures(entries); 431 SetupComputeTextures(entries);
603 SetupComputeStorageTexels(entries); 432 SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
606 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 435 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
607 texture_cache.FillComputeImageViews(indices_span, image_view_ids); 436 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
608 437
609 buffer_cache.Map(CalculateComputeStreamBufferSize());
610
611 update_descriptor_queue.Acquire(); 438 update_descriptor_queue.Acquire();
612 439
613 SetupComputeConstBuffers(entries); 440 buffer_cache.BindHostComputeBuffers();
614 SetupComputeGlobalBuffers(entries);
615 441
616 ImageViewId* image_view_id_ptr = image_view_ids.data(); 442 ImageViewId* image_view_id_ptr = image_view_ids.data();
617 VkSampler* sampler_ptr = sampler_handles.data(); 443 VkSampler* sampler_ptr = sampler_handles.data();
618 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, 444 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
619 sampler_ptr); 445 sampler_ptr);
620 446
621 buffer_cache.Unmap();
622
623 const VkPipeline pipeline_handle = pipeline.GetHandle(); 447 const VkPipeline pipeline_handle = pipeline.GetHandle();
624 const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); 448 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
625 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); 449 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
644 query_cache.Query(gpu_addr, type, timestamp); 468 query_cache.Query(gpu_addr, type, timestamp);
645} 469}
646 470
471void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
472 u32 size) {
473 buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
474}
475
647void RasterizerVulkan::FlushAll() {} 476void RasterizerVulkan::FlushAll() {}
648 477
649void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { 478void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
651 return; 480 return;
652 } 481 }
653 { 482 {
654 auto lock = texture_cache.AcquireLock(); 483 std::scoped_lock lock{texture_cache.mutex};
655 texture_cache.DownloadMemory(addr, size); 484 texture_cache.DownloadMemory(addr, size);
656 } 485 }
657 buffer_cache.FlushRegion(addr, size); 486 {
487 std::scoped_lock lock{buffer_cache.mutex};
488 buffer_cache.DownloadMemory(addr, size);
489 }
658 query_cache.FlushRegion(addr, size); 490 query_cache.FlushRegion(addr, size);
659} 491}
660 492
661bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { 493bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
494 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
662 if (!Settings::IsGPULevelHigh()) { 495 if (!Settings::IsGPULevelHigh()) {
663 return buffer_cache.MustFlushRegion(addr, size); 496 return buffer_cache.IsRegionGpuModified(addr, size);
664 } 497 }
665 return texture_cache.IsRegionGpuModified(addr, size) || 498 return texture_cache.IsRegionGpuModified(addr, size) ||
666 buffer_cache.MustFlushRegion(addr, size); 499 buffer_cache.IsRegionGpuModified(addr, size);
667} 500}
668 501
669void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 502void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
671 return; 504 return;
672 } 505 }
673 { 506 {
674 auto lock = texture_cache.AcquireLock(); 507 std::scoped_lock lock{texture_cache.mutex};
675 texture_cache.WriteMemory(addr, size); 508 texture_cache.WriteMemory(addr, size);
676 } 509 }
510 {
511 std::scoped_lock lock{buffer_cache.mutex};
512 buffer_cache.WriteMemory(addr, size);
513 }
677 pipeline_cache.InvalidateRegion(addr, size); 514 pipeline_cache.InvalidateRegion(addr, size);
678 buffer_cache.InvalidateRegion(addr, size);
679 query_cache.InvalidateRegion(addr, size); 515 query_cache.InvalidateRegion(addr, size);
680} 516}
681 517
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
683 if (addr == 0 || size == 0) { 519 if (addr == 0 || size == 0) {
684 return; 520 return;
685 } 521 }
522 pipeline_cache.OnCPUWrite(addr, size);
686 { 523 {
687 auto lock = texture_cache.AcquireLock(); 524 std::scoped_lock lock{texture_cache.mutex};
688 texture_cache.WriteMemory(addr, size); 525 texture_cache.WriteMemory(addr, size);
689 } 526 }
690 pipeline_cache.OnCPUWrite(addr, size); 527 {
691 buffer_cache.OnCPUWrite(addr, size); 528 std::scoped_lock lock{buffer_cache.mutex};
529 buffer_cache.CachedWriteMemory(addr, size);
530 }
692} 531}
693 532
694void RasterizerVulkan::SyncGuestHost() { 533void RasterizerVulkan::SyncGuestHost() {
695 buffer_cache.SyncGuestHost();
696 pipeline_cache.SyncGuestHost(); 534 pipeline_cache.SyncGuestHost();
535 {
536 std::scoped_lock lock{buffer_cache.mutex};
537 buffer_cache.FlushCachedWrites();
538 }
697} 539}
698 540
699void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 541void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
700 { 542 {
701 auto lock = texture_cache.AcquireLock(); 543 std::scoped_lock lock{texture_cache.mutex};
702 texture_cache.UnmapMemory(addr, size); 544 texture_cache.UnmapMemory(addr, size);
703 } 545 }
704 buffer_cache.OnCPUWrite(addr, size); 546 {
547 std::scoped_lock lock{buffer_cache.mutex};
548 buffer_cache.WriteMemory(addr, size);
549 }
705 pipeline_cache.OnCPUWrite(addr, size); 550 pipeline_cache.OnCPUWrite(addr, size);
706} 551}
707 552
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
774 draw_counter = 0; 619 draw_counter = 0;
775 update_descriptor_queue.TickFrame(); 620 update_descriptor_queue.TickFrame();
776 fence_manager.TickFrame(); 621 fence_manager.TickFrame();
777 buffer_cache.TickFrame();
778 staging_pool.TickFrame(); 622 staging_pool.TickFrame();
779 { 623 {
780 auto lock = texture_cache.AcquireLock(); 624 std::scoped_lock lock{texture_cache.mutex};
781 texture_cache.TickFrame(); 625 texture_cache.TickFrame();
782 } 626 }
627 {
628 std::scoped_lock lock{buffer_cache.mutex};
629 buffer_cache.TickFrame();
630 }
783} 631}
784 632
785bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 633bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
786 const Tegra::Engines::Fermi2D::Surface& dst, 634 const Tegra::Engines::Fermi2D::Surface& dst,
787 const Tegra::Engines::Fermi2D::Config& copy_config) { 635 const Tegra::Engines::Fermi2D::Config& copy_config) {
788 auto lock = texture_cache.AcquireLock(); 636 std::scoped_lock lock{texture_cache.mutex};
789 texture_cache.BlitImage(dst, src, copy_config); 637 texture_cache.BlitImage(dst, src, copy_config);
790 return true; 638 return true;
791} 639}
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
795 if (!framebuffer_addr) { 643 if (!framebuffer_addr) {
796 return false; 644 return false;
797 } 645 }
798 646 std::scoped_lock lock{texture_cache.mutex};
799 auto lock = texture_cache.AcquireLock();
800 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); 647 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
801 if (!image_view) { 648 if (!image_view) {
802 return false; 649 return false;
803 } 650 }
804
805 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 651 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
806 screen_info.width = image_view->size.width; 652 screen_info.width = image_view->size.width;
807 screen_info.height = image_view->size.height; 653 screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
830 draw_counter = 0; 676 draw_counter = 0;
831} 677}
832 678
833RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
834 BufferBindings& buffer_bindings,
835 bool is_indexed,
836 bool is_instanced) {
837 MICROPROFILE_SCOPE(Vulkan_Geometry);
838
839 const auto& regs = maxwell3d.regs;
840
841 SetupVertexArrays(buffer_bindings);
842
843 const u32 base_instance = regs.vb_base_instance;
844 const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
845 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
846 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
847
848 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
849 SetupIndexBuffer(buffer_bindings, params, is_indexed);
850
851 return params;
852}
853
854void RasterizerVulkan::SetupShaderDescriptors( 679void RasterizerVulkan::SetupShaderDescriptors(
855 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 680 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
856 image_view_indices.clear(); 681 image_view_indices.clear();
857 sampler_handles.clear(); 682 sampler_handles.clear();
858 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 683 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
860 if (!shader) { 685 if (!shader) {
861 continue; 686 continue;
862 } 687 }
863 const auto& entries = shader->GetEntries(); 688 const ShaderEntries& entries = shader->GetEntries();
864 SetupGraphicsUniformTexels(entries, stage); 689 SetupGraphicsUniformTexels(entries, stage);
865 SetupGraphicsTextures(entries, stage); 690 SetupGraphicsTextures(entries, stage);
866 SetupGraphicsStorageTexels(entries, stage); 691 SetupGraphicsStorageTexels(entries, stage);
867 SetupGraphicsImages(entries, stage); 692 SetupGraphicsImages(entries, stage);
693
694 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
695 buffer_cache.UnbindGraphicsStorageBuffers(stage);
696 u32 ssbo_index = 0;
697 for (const auto& buffer : entries.global_buffers) {
698 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
699 buffer.cbuf_offset, buffer.is_written);
700 ++ssbo_index;
701 }
868 } 702 }
869 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 703 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
704 buffer_cache.UpdateGraphicsBuffers(is_indexed);
870 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); 705 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
871 706
707 buffer_cache.BindHostGeometryBuffers(is_indexed);
708
872 update_descriptor_queue.Acquire(); 709 update_descriptor_queue.Acquire();
873 710
874 ImageViewId* image_view_id_ptr = image_view_ids.data(); 711 ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
879 if (!shader) { 716 if (!shader) {
880 continue; 717 continue;
881 } 718 }
882 const auto& entries = shader->GetEntries(); 719 buffer_cache.BindHostStageBuffers(stage);
883 SetupGraphicsConstBuffers(entries, stage); 720 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
884 SetupGraphicsGlobalBuffers(entries, stage); 721 image_view_id_ptr, sampler_ptr);
885 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
886 sampler_ptr);
887 } 722 }
888} 723}
889 724
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
916 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); 751 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
917 return; 752 return;
918 } 753 }
919
920 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 754 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
921 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 755 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
922 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 756 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
923 757 scheduler.Record(
924 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); 758 [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
925 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
926 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
927
928 const auto& binding = regs.tfb_bindings[0];
929 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
930 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
931
932 const GPUVAddr gpu_addr = binding.Address();
933 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
934 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
935
936 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
937 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
938 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
939 });
940} 759}
941 760
942void RasterizerVulkan::EndTransformFeedback() { 761void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
947 if (!device.IsExtTransformFeedbackSupported()) { 766 if (!device.IsExtTransformFeedbackSupported()) {
948 return; 767 return;
949 } 768 }
950
951 scheduler.Record( 769 scheduler.Record(
952 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 770 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
953} 771}
954 772
955void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
956 const auto& regs = maxwell3d.regs;
957
958 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
959 const auto& vertex_array = regs.vertex_array[index];
960 if (!vertex_array.IsEnabled()) {
961 continue;
962 }
963 const GPUVAddr start{vertex_array.StartAddress()};
964 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
965
966 ASSERT(end >= start);
967 const size_t size = end - start;
968 if (size == 0) {
969 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
970 continue;
971 }
972 const auto info = buffer_cache.UploadMemory(start, size);
973 buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
974 }
975}
976
977void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
978 bool is_indexed) {
979 if (params.num_vertices == 0) {
980 return;
981 }
982 const auto& regs = maxwell3d.regs;
983 switch (regs.draw.topology) {
984 case Maxwell::PrimitiveTopology::Quads: {
985 if (!params.is_indexed) {
986 const auto [buffer, offset] =
987 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
988 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
989 params.base_vertex = 0;
990 params.num_vertices = params.num_vertices * 6 / 4;
991 params.is_indexed = true;
992 break;
993 }
994 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
995 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
996 VkBuffer buffer = info.handle;
997 u64 offset = info.offset;
998 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
999 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
1000
1001 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
1002 params.num_vertices = (params.num_vertices / 4) * 6;
1003 params.base_vertex = 0;
1004 break;
1005 }
1006 default: {
1007 if (!is_indexed) {
1008 break;
1009 }
1010 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
1011 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1012 VkBuffer buffer = info.handle;
1013 u64 offset = info.offset;
1014
1015 auto format = regs.index_array.format;
1016 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
1017 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
1018 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
1019 format = Maxwell::IndexFormat::UnsignedShort;
1020 }
1021
1022 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
1023 break;
1024 }
1025 }
1026}
1027
1028void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1029 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1030 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1031 for (const auto& entry : entries.const_buffers) {
1032 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
1033 }
1034}
1035
1036void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1037 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1038 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1039
1040 for (const auto& entry : entries.global_buffers) {
1041 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
1042 SetupGlobalBuffer(entry, addr);
1043 }
1044}
1045
1046void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { 773void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1047 MICROPROFILE_SCOPE(Vulkan_Textures);
1048 const auto& regs = maxwell3d.regs; 774 const auto& regs = maxwell3d.regs;
1049 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 775 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1050 for (const auto& entry : entries.uniform_texels) { 776 for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
1054} 780}
1055 781
1056void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { 782void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1057 MICROPROFILE_SCOPE(Vulkan_Textures);
1058 const auto& regs = maxwell3d.regs; 783 const auto& regs = maxwell3d.regs;
1059 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 784 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1060 for (const auto& entry : entries.samplers) { 785 for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
1070} 795}
1071 796
1072void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { 797void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1073 MICROPROFILE_SCOPE(Vulkan_Textures);
1074 const auto& regs = maxwell3d.regs; 798 const auto& regs = maxwell3d.regs;
1075 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 799 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1076 for (const auto& entry : entries.storage_texels) { 800 for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
1080} 804}
1081 805
1082void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { 806void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1083 MICROPROFILE_SCOPE(Vulkan_Images);
1084 const auto& regs = maxwell3d.regs; 807 const auto& regs = maxwell3d.regs;
1085 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 808 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1086 for (const auto& entry : entries.images) { 809 for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
1089 } 812 }
1090} 813}
1091 814
1092void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1093 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1094 const auto& launch_desc = kepler_compute.launch_description;
1095 for (const auto& entry : entries.const_buffers) {
1096 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1097 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1098 const Tegra::Engines::ConstBufferInfo info{
1099 .address = config.Address(),
1100 .size = config.size,
1101 .enabled = mask[entry.GetIndex()],
1102 };
1103 SetupConstBuffer(entry, info);
1104 }
1105}
1106
1107void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1108 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1109 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1110 for (const auto& entry : entries.global_buffers) {
1111 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
1112 SetupGlobalBuffer(entry, addr);
1113 }
1114}
1115
1116void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 815void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1117 MICROPROFILE_SCOPE(Vulkan_Textures);
1118 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 816 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1119 for (const auto& entry : entries.uniform_texels) { 817 for (const auto& entry : entries.uniform_texels) {
1120 const TextureHandle handle = 818 const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1124} 822}
1125 823
1126void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 824void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1127 MICROPROFILE_SCOPE(Vulkan_Textures);
1128 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 825 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1129 for (const auto& entry : entries.samplers) { 826 for (const auto& entry : entries.samplers) {
1130 for (size_t index = 0; index < entry.size; ++index) { 827 for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1139} 836}
1140 837
1141void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 838void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1142 MICROPROFILE_SCOPE(Vulkan_Textures);
1143 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 839 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1144 for (const auto& entry : entries.storage_texels) { 840 for (const auto& entry : entries.storage_texels) {
1145 const TextureHandle handle = 841 const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1149} 845}
1150 846
1151void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 847void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1152 MICROPROFILE_SCOPE(Vulkan_Images);
1153 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 848 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1154 for (const auto& entry : entries.images) { 849 for (const auto& entry : entries.images) {
1155 const TextureHandle handle = 850 const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1158 } 853 }
1159} 854}
1160 855
1161void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1162 const Tegra::Engines::ConstBufferInfo& buffer) {
1163 if (!buffer.enabled) {
1164 // Set values to zero to unbind buffers
1165 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1166 return;
1167 }
1168 // Align the size to avoid bad std140 interactions
1169 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1170 ASSERT(size <= MaxConstbufferSize);
1171
1172 const u64 alignment = device.GetUniformBufferAlignment();
1173 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1174 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1175}
1176
1177void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
1178 const u64 actual_addr = gpu_memory.Read<u64>(address);
1179 const u32 size = gpu_memory.Read<u32>(address + 8);
1180
1181 if (size == 0) {
1182 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
1183 // because Vulkan doesn't like empty buffers.
1184 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1185 // default buffer.
1186 static constexpr size_t dummy_size = 4;
1187 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1188 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1189 return;
1190 }
1191
1192 const auto info = buffer_cache.UploadMemory(
1193 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1194 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1195}
1196
1197void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 856void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1198 if (!state_tracker.TouchViewports()) { 857 if (!state_tracker.TouchViewports()) {
1199 return; 858 return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
1206 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), 865 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
1207 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), 866 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
1208 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), 867 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
1209 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; 868 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
869 };
1210 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); 870 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
1211} 871}
1212 872
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
1214 if (!state_tracker.TouchScissors()) { 874 if (!state_tracker.TouchScissors()) {
1215 return; 875 return;
1216 } 876 }
1217 const std::array scissors = { 877 const std::array scissors{
1218 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), 878 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
1219 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), 879 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
1220 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), 880 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
1221 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), 881 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
1222 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), 882 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
1223 GetScissorState(regs, 15)}; 883 GetScissorState(regs, 15),
884 };
1224 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); 885 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
1225} 886}
1226 887
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1385 }); 1046 });
1386} 1047}
1387 1048
1388size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1389 size_t size = CalculateVertexArraysSize();
1390 if (is_indexed) {
1391 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1392 }
1393 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1394 return size;
1395}
1396
1397size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1398 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1399 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1400}
1401
1402size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1403 const auto& regs = maxwell3d.regs;
1404
1405 size_t size = 0;
1406 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1407 // This implementation assumes that all attributes are used in the shader.
1408 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1409 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1410 DEBUG_ASSERT(end >= start);
1411
1412 size += (end - start) * regs.vertex_array[index].enable;
1413 }
1414 return size;
1415}
1416
1417size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1418 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1419 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1420}
1421
1422size_t RasterizerVulkan::CalculateConstBufferSize(
1423 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1424 if (entry.IsIndirect()) {
1425 // Buffer is accessed indirectly, so upload the entire thing
1426 return buffer.size;
1427 } else {
1428 // Buffer is accessed directly, upload just what we use
1429 return entry.GetSize();
1430 }
1431}
1432
1433VkBuffer RasterizerVulkan::DefaultBuffer() {
1434 if (default_buffer) {
1435 return *default_buffer;
1436 }
1437 default_buffer = device.GetLogical().CreateBuffer({
1438 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1439 .pNext = nullptr,
1440 .flags = 0,
1441 .size = DEFAULT_BUFFER_SIZE,
1442 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1443 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
1444 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1445 .queueFamilyIndexCount = 0,
1446 .pQueueFamilyIndices = nullptr,
1447 });
1448 default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
1449
1450 scheduler.RequestOutsideRenderPassOperationContext();
1451 scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
1452 cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
1453 });
1454 return *default_buffer;
1455}
1456
1457} // namespace Vulkan 1049} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..7fc6741da 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,12 @@
18#include "video_core/renderer_vulkan/blit_image.h" 18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 21#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 22#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 23#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
25#include "video_core/renderer_vulkan/vk_query_cache.h" 24#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 25#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 26#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
28#include "video_core/renderer_vulkan/vk_stream_buffer.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 27#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 28#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h" 29#include "video_core/shader/async_shaders.h"
@@ -49,7 +47,6 @@ namespace Vulkan {
49struct VKScreenInfo; 47struct VKScreenInfo;
50 48
51class StateTracker; 49class StateTracker;
52class BufferBindings;
53 50
54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 51class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
55public: 52public:
@@ -65,6 +62,7 @@ public:
65 void DispatchCompute(GPUVAddr code_addr) override; 62 void DispatchCompute(GPUVAddr code_addr) override;
66 void ResetCounter(VideoCore::QueryType type) override; 63 void ResetCounter(VideoCore::QueryType type) override;
67 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 64 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
65 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
68 void FlushAll() override; 66 void FlushAll() override;
69 void FlushRegion(VAddr addr, u64 size) override; 67 void FlushRegion(VAddr addr, u64 size) override;
70 bool MustFlushRegion(VAddr addr, u64 size) override; 68 bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +105,11 @@ private:
107 105
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); 106 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109 107
110 struct DrawParameters {
111 void Draw(vk::CommandBuffer cmdbuf) const;
112
113 u32 base_instance = 0;
114 u32 num_instances = 0;
115 u32 base_vertex = 0;
116 u32 num_vertices = 0;
117 bool is_indexed = 0;
118 };
119
120 void FlushWork(); 108 void FlushWork();
121 109
122 /// Setups geometry buffers and state.
123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
124 bool is_indexed, bool is_instanced);
125
126 /// Setup descriptors in the graphics pipeline. 110 /// Setup descriptors in the graphics pipeline.
127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 111 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
112 bool is_indexed);
128 113
129 void UpdateDynamicStates(); 114 void UpdateDynamicStates();
130 115
@@ -132,16 +117,6 @@ private:
132 117
133 void EndTransformFeedback(); 118 void EndTransformFeedback();
134 119
135 void SetupVertexArrays(BufferBindings& buffer_bindings);
136
137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
138
139 /// Setup constant buffers in the graphics pipeline.
140 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
141
142 /// Setup global buffers in the graphics pipeline.
143 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
144
145 /// Setup uniform texels in the graphics pipeline. 120 /// Setup uniform texels in the graphics pipeline.
146 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); 121 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
147 122
@@ -154,12 +129,6 @@ private:
154 /// Setup images in the graphics pipeline. 129 /// Setup images in the graphics pipeline.
155 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); 130 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
156 131
157 /// Setup constant buffers in the compute pipeline.
158 void SetupComputeConstBuffers(const ShaderEntries& entries);
159
160 /// Setup global buffers in the compute pipeline.
161 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
162
163 /// Setup texel buffers in the compute pipeline. 132 /// Setup texel buffers in the compute pipeline.
164 void SetupComputeUniformTexels(const ShaderEntries& entries); 133 void SetupComputeUniformTexels(const ShaderEntries& entries);
165 134
@@ -172,11 +141,6 @@ private:
172 /// Setup images in the compute pipeline. 141 /// Setup images in the compute pipeline.
173 void SetupComputeImages(const ShaderEntries& entries); 142 void SetupComputeImages(const ShaderEntries& entries);
174 143
175 void SetupConstBuffer(const ConstBufferEntry& entry,
176 const Tegra::Engines::ConstBufferInfo& buffer);
177
178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
179
180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 144 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 145 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 146 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +157,6 @@ private:
193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 157 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 158 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
195 159
196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
197
198 size_t CalculateComputeStreamBufferSize() const;
199
200 size_t CalculateVertexArraysSize() const;
201
202 size_t CalculateIndexBufferSize() const;
203
204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
206
207 VkBuffer DefaultBuffer();
208
209 Tegra::GPU& gpu; 160 Tegra::GPU& gpu;
210 Tegra::MemoryManager& gpu_memory; 161 Tegra::MemoryManager& gpu_memory;
211 Tegra::Engines::Maxwell3D& maxwell3d; 162 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +168,19 @@ private:
217 StateTracker& state_tracker; 168 StateTracker& state_tracker;
218 VKScheduler& scheduler; 169 VKScheduler& scheduler;
219 170
220 VKStreamBuffer stream_buffer;
221 StagingBufferPool staging_pool; 171 StagingBufferPool staging_pool;
222 VKDescriptorPool descriptor_pool; 172 VKDescriptorPool descriptor_pool;
223 VKUpdateDescriptorQueue update_descriptor_queue; 173 VKUpdateDescriptorQueue update_descriptor_queue;
224 BlitImageHelper blit_image; 174 BlitImageHelper blit_image;
225 QuadArrayPass quad_array_pass;
226 QuadIndexedPass quad_indexed_pass;
227 Uint8Pass uint8_pass;
228 175
229 TextureCacheRuntime texture_cache_runtime; 176 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache; 177 TextureCache texture_cache;
178 BufferCacheRuntime buffer_cache_runtime;
179 BufferCache buffer_cache;
231 VKPipelineCache pipeline_cache; 180 VKPipelineCache pipeline_cache;
232 VKBufferCache buffer_cache;
233 VKQueryCache query_cache; 181 VKQueryCache query_cache;
234 VKFenceManager fence_manager; 182 VKFenceManager fence_manager;
235 183
236 vk::Buffer default_buffer;
237 MemoryCommit default_buffer_commit;
238 vk::Event wfi_event; 184 vk::Event wfi_event;
239 VideoCommon::Shader::AsyncShaders async_shaders; 185 VideoCommon::Shader::AsyncShaders async_shaders;
240 186
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
52 worker_thread.join(); 52 worker_thread.join();
53} 53}
54 54
55u64 VKScheduler::CurrentTick() const noexcept {
56 return master_semaphore->CurrentTick();
57}
58
59bool VKScheduler::IsFree(u64 tick) const noexcept {
60 return master_semaphore->IsFree(tick);
61}
62
63void VKScheduler::Wait(u64 tick) {
64 master_semaphore->Wait(tick);
65}
66
67void VKScheduler::Flush(VkSemaphore semaphore) { 55void VKScheduler::Flush(VkSemaphore semaphore) {
68 SubmitExecution(semaphore); 56 SubmitExecution(semaphore);
69 AllocateNewContext(); 57 AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 257 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 258 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 259 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, 260 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images)); 261 vk::Span(barriers.data(), num_images));
274 }); 262 });
275 state.renderpass = nullptr; 263 state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/threadsafe_queue.h" 16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
21class CommandPool; 22class CommandPool;
22class Device; 23class Device;
23class Framebuffer; 24class Framebuffer;
24class MasterSemaphore;
25class StateTracker; 25class StateTracker;
26class VKQueryCache; 26class VKQueryCache;
27 27
@@ -32,15 +32,6 @@ public:
32 explicit VKScheduler(const Device& device, StateTracker& state_tracker); 32 explicit VKScheduler(const Device& device, StateTracker& state_tracker);
33 ~VKScheduler(); 33 ~VKScheduler();
34 34
35 /// Returns the current command buffer tick.
36 [[nodiscard]] u64 CurrentTick() const noexcept;
37
38 /// Returns true when a tick has been triggered by the GPU.
39 [[nodiscard]] bool IsFree(u64 tick) const noexcept;
40
41 /// Waits for the given tick to trigger on the GPU.
42 void Wait(u64 tick);
43
44 /// Sends the current execution context to the GPU. 35 /// Sends the current execution context to the GPU.
45 void Flush(VkSemaphore semaphore = nullptr); 36 void Flush(VkSemaphore semaphore = nullptr);
46 37
@@ -82,6 +73,21 @@ public:
82 (void)chunk->Record(command); 73 (void)chunk->Record(command);
83 } 74 }
84 75
76 /// Returns the current command buffer tick.
77 [[nodiscard]] u64 CurrentTick() const noexcept {
78 return master_semaphore->CurrentTick();
79 }
80
81 /// Returns true when a tick has been triggered by the GPU.
82 [[nodiscard]] bool IsFree(u64 tick) const noexcept {
83 return master_semaphore->IsFree(tick);
84 }
85
86 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) {
88 master_semaphore->Wait(tick);
89 }
90
85 /// Returns the master timeline semaphore. 91 /// Returns the master timeline semaphore.
86 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { 92 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
87 return *master_semaphore; 93 return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..40e2e0d38 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3106 entries.const_buffers.emplace_back(cbuf.second, cbuf.first); 3106 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
3107 } 3107 }
3108 for (const auto& [base, usage] : ir.GetGlobalMemory()) { 3108 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
3109 entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); 3109 entries.global_buffers.emplace_back(GlobalBufferEntry{
3110 .cbuf_index = base.cbuf_index,
3111 .cbuf_offset = base.cbuf_offset,
3112 .is_written = usage.is_written,
3113 });
3110 } 3114 }
3111 for (const auto& sampler : ir.GetSamplers()) { 3115 for (const auto& sampler : ir.GetSamplers()) {
3112 if (sampler.is_buffer) { 3116 if (sampler.is_buffer) {
@@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3127 entries.attributes.insert(GetGenericAttributeLocation(attribute)); 3131 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3128 } 3132 }
3129 } 3133 }
3134 for (const auto& buffer : entries.const_buffers) {
3135 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3136 }
3130 entries.clip_distances = ir.GetClipDistances(); 3137 entries.clip_distances = ir.GetClipDistances();
3131 entries.shader_length = ir.GetLength(); 3138 entries.shader_length = ir.GetLength();
3132 entries.uses_warps = ir.UsesWarps(); 3139 entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
39 u32 index{}; 39 u32 index{};
40}; 40};
41 41
42class GlobalBufferEntry { 42struct GlobalBufferEntry {
43public:
44 constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
45 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
46
47 constexpr u32 GetCbufIndex() const {
48 return cbuf_index;
49 }
50
51 constexpr u32 GetCbufOffset() const {
52 return cbuf_offset;
53 }
54
55 constexpr bool IsWritten() const {
56 return is_written;
57 }
58
59private:
60 u32 cbuf_index{}; 43 u32 cbuf_index{};
61 u32 cbuf_offset{}; 44 u32 cbuf_offset{};
62 bool is_written{}; 45 bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
78 std::set<u32> attributes; 61 std::set<u32> attributes;
79 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
80 std::size_t shader_length{}; 63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
81 bool uses_warps{}; 65 bool uses_warps{};
82}; 66};
83 67
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..dfd8c8e5a 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
8 8
9#include <fmt/format.h> 9#include <fmt/format.h>
10 10
11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
12#include "common/bit_util.h" 13#include "common/bit_util.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
@@ -17,18 +18,119 @@
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
21namespace {
22// Maximum potential alignment of a Vulkan buffer
23constexpr VkDeviceSize MAX_ALIGNMENT = 256;
24// Maximum size to put elements in the stream buffer
25constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
26// Stream buffer size in bytes
27constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
28constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
29
30constexpr VkMemoryPropertyFlags HOST_FLAGS =
31 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
32constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
33
34bool IsStreamHeap(VkMemoryHeap heap) noexcept {
35 return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
36}
37
38std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
39 VkMemoryPropertyFlags flags) noexcept {
40 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
41 if (((type_mask >> type_index) & 1) == 0) {
42 // Memory type is incompatible
43 continue;
44 }
45 const VkMemoryType& memory_type = props.memoryTypes[type_index];
46 if ((memory_type.propertyFlags & flags) != flags) {
47 // Memory type doesn't have the flags we want
48 continue;
49 }
50 if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
51 // Memory heap is not suitable for streaming
52 continue;
53 }
54 // Success!
55 return type_index;
56 }
57 return std::nullopt;
58}
59
60u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
61 // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
62 std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
63 if (type) {
64 return *type;
65 }
66 // Otherwise try without the DEVICE_LOCAL_BIT
67 type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
68 if (type) {
69 return *type;
70 }
71 // This should never happen, and in case it does, signal it as an out of memory situation
72 throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
73}
74
75size_t Region(size_t iterator) noexcept {
76 return iterator / REGION_SIZE;
77}
78} // Anonymous namespace
20 79
21StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, 80StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
22 VKScheduler& scheduler_) 81 VKScheduler& scheduler_)
23 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} 82 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
83 const vk::Device& dev = device.GetLogical();
84 stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
85 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
86 .pNext = nullptr,
87 .flags = 0,
88 .size = STREAM_BUFFER_SIZE,
89 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
90 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
91 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
92 .queueFamilyIndexCount = 0,
93 .pQueueFamilyIndices = nullptr,
94 });
95 if (device.HasDebuggingToolAttached()) {
96 stream_buffer.SetObjectNameEXT("Stream Buffer");
97 }
98 VkMemoryDedicatedRequirements dedicated_reqs{
99 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
100 .pNext = nullptr,
101 .prefersDedicatedAllocation = VK_FALSE,
102 .requiresDedicatedAllocation = VK_FALSE,
103 };
104 const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
105 const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
106 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
107 const VkMemoryDedicatedAllocateInfo dedicated_info{
108 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
109 .pNext = nullptr,
110 .image = nullptr,
111 .buffer = *stream_buffer,
112 };
113 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
114 stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
115 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
116 .pNext = make_dedicated ? &dedicated_info : nullptr,
117 .allocationSize = requirements.size,
118 .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
119 });
120 if (device.HasDebuggingToolAttached()) {
121 stream_memory.SetObjectNameEXT("Stream Buffer Memory");
122 }
123 stream_buffer.BindMemory(*stream_memory, 0);
124 stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
125}
24 126
25StagingBufferPool::~StagingBufferPool() = default; 127StagingBufferPool::~StagingBufferPool() = default;
26 128
27StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { 129StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
28 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { 130 if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
29 return *ref; 131 return GetStreamBuffer(size);
30 } 132 }
31 return CreateStagingBuffer(size, usage); 133 return GetStagingBuffer(size, usage);
32} 134}
33 135
34void StagingBufferPool::TickFrame() { 136void StagingBufferPool::TickFrame() {
@@ -39,6 +141,51 @@ void StagingBufferPool::TickFrame() {
39 ReleaseCache(MemoryUsage::Download); 141 ReleaseCache(MemoryUsage::Download);
40} 142}
41 143
144StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
145 if (AreRegionsActive(Region(free_iterator) + 1,
146 std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
147 // Avoid waiting for the previous usages to be free
148 return GetStagingBuffer(size, MemoryUsage::Upload);
149 }
150 const u64 current_tick = scheduler.CurrentTick();
151 std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
152 current_tick);
153 used_iterator = iterator;
154 free_iterator = std::max(free_iterator, iterator + size);
155
156 if (iterator + size > STREAM_BUFFER_SIZE) {
157 std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
158 current_tick);
159 used_iterator = 0;
160 iterator = 0;
161 free_iterator = size;
162
163 if (AreRegionsActive(0, Region(size) + 1)) {
164 // Avoid waiting for the previous usages to be free
165 return GetStagingBuffer(size, MemoryUsage::Upload);
166 }
167 }
168 const size_t offset = iterator;
169 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
170 return StagingBufferRef{
171 .buffer = *stream_buffer,
172 .offset = static_cast<VkDeviceSize>(offset),
173 .mapped_span = std::span<u8>(stream_pointer + offset, size),
174 };
175}
176
177bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
178 return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
179 [this](u64 sync_tick) { return !scheduler.IsFree(sync_tick); });
180};
181
182StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
183 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
184 return *ref;
185 }
186 return CreateStagingBuffer(size, usage);
187}
188
42std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, 189std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
43 MemoryUsage usage) { 190 MemoryUsage usage) {
44 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; 191 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..69f7618de 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
19 19
20struct StagingBufferRef { 20struct StagingBufferRef {
21 VkBuffer buffer; 21 VkBuffer buffer;
22 VkDeviceSize offset;
22 std::span<u8> mapped_span; 23 std::span<u8> mapped_span;
23}; 24};
24 25
25class StagingBufferPool { 26class StagingBufferPool {
26public: 27public:
28 static constexpr size_t NUM_SYNCS = 16;
29
27 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, 30 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
28 VKScheduler& scheduler); 31 VKScheduler& scheduler);
29 ~StagingBufferPool(); 32 ~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
33 void TickFrame(); 36 void TickFrame();
34 37
35private: 38private:
39 struct StreamBufferCommit {
40 size_t upper_bound;
41 u64 tick;
42 };
43
36 struct StagingBuffer { 44 struct StagingBuffer {
37 vk::Buffer buffer; 45 vk::Buffer buffer;
38 MemoryCommit commit; 46 MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
42 StagingBufferRef Ref() const noexcept { 50 StagingBufferRef Ref() const noexcept {
43 return { 51 return {
44 .buffer = *buffer, 52 .buffer = *buffer,
53 .offset = 0,
45 .mapped_span = mapped_span, 54 .mapped_span = mapped_span,
46 }; 55 };
47 } 56 }
@@ -56,6 +65,12 @@ private:
56 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; 65 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
57 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; 66 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
58 67
68 StagingBufferRef GetStreamBuffer(size_t size);
69
70 bool AreRegionsActive(size_t region_begin, size_t region_end) const;
71
72 StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
73
59 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); 74 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
60 75
61 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); 76 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +85,15 @@ private:
70 MemoryAllocator& memory_allocator; 85 MemoryAllocator& memory_allocator;
71 VKScheduler& scheduler; 86 VKScheduler& scheduler;
72 87
88 vk::Buffer stream_buffer;
89 vk::DeviceMemory stream_memory;
90 u8* stream_pointer = nullptr;
91
92 size_t iterator = 0;
93 size_t used_iterator = 0;
94 size_t free_iterator = 0;
95 std::array<u64, NUM_SYNCS> sync_ticks{};
96
73 StagingBuffersCache device_local_cache; 97 StagingBuffersCache device_local_cache;
74 StagingBuffersCache upload_cache; 98 StagingBuffersCache upload_cache;
75 StagingBuffersCache download_cache; 99 StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..e81fad007 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
30using Flags = Maxwell3D::DirtyState::Flags; 30using Flags = Maxwell3D::DirtyState::Flags;
31 31
32Flags MakeInvalidationFlags() { 32Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{ 33 static constexpr int INVALIDATION_FLAGS[]{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, 36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
37 }; 37 };
38 Flags flags{}; 38 Flags flags{};
39 for (const int flag : INVALIDATION_FLAGS) { 39 for (const int flag : INVALIDATION_FLAGS) {
40 flags[flag] = true; 40 flags[flag] = true;
41 } 41 }
42 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
43 flags[index] = true;
44 }
42 return flags; 45 return flags;
43} 46}
44 47
@@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
130StateTracker::StateTracker(Tegra::GPU& gpu) 133StateTracker::StateTracker(Tegra::GPU& gpu)
131 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 134 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
132 auto& tables = gpu.Maxwell3D().dirty.tables; 135 auto& tables = gpu.Maxwell3D().dirty.tables;
133 SetupDirtyRenderTargets(tables); 136 SetupDirtyFlags(tables);
134 SetupDirtyViewports(tables); 137 SetupDirtyViewports(tables);
135 SetupDirtyScissors(tables); 138 SetupDirtyScissors(tables);
136 SetupDirtyDepthBias(tables); 139 SetupDirtyDepthBias(tables);
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 725a2a05d..0b63bd6c8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
56 56
57} // Anonymous namespace 57} // Anonymous namespace
58 58
59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) 59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
60 : surface{surface_}, device{device_}, scheduler{scheduler_} {} 60 u32 width, u32 height, bool srgb)
61 : surface{surface_}, device{device_}, scheduler{scheduler_} {
62 Create(width, height, srgb);
63}
61 64
62VKSwapchain::~VKSwapchain() = default; 65VKSwapchain::~VKSwapchain() = default;
63 66
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 2eadd62b3..a728511e0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -20,7 +20,8 @@ class VKScheduler;
20 20
21class VKSwapchain { 21class VKSwapchain {
22public: 22public:
23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); 23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
24 u32 width, u32 height, bool srgb);
24 ~VKSwapchain(); 25 ~VKSwapchain();
25 26
26 /// Creates (or recreates) the swapchain with a given size. 27 /// Creates (or recreates) the swapchain with a given size.
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, 426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
427 VkImageAspectFlags aspect_mask, bool is_initialized, 427 VkImageAspectFlags aspect_mask, bool is_initialized,
428 std::span<const VkBufferImageCopy> copies) { 428 std::span<const VkBufferImageCopy> copies) {
429 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | 429 static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
430 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 430 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
432 static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
433 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
434 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
432 const VkImageMemoryBarrier read_barrier{ 435 const VkImageMemoryBarrier read_barrier{
433 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 436 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
434 .pNext = nullptr, 437 .pNext = nullptr,
435 .srcAccessMask = ACCESS_FLAGS, 438 .srcAccessMask = WRITE_ACCESS_FLAGS,
436 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 439 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
437 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, 440 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
438 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 441 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
439 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 442 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
440 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 443 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
441 .image = image, 444 .image = image,
442 .subresourceRange = 445 .subresourceRange{
443 { 446 .aspectMask = aspect_mask,
444 .aspectMask = aspect_mask, 447 .baseMipLevel = 0,
445 .baseMipLevel = 0, 448 .levelCount = VK_REMAINING_MIP_LEVELS,
446 .levelCount = VK_REMAINING_MIP_LEVELS, 449 .baseArrayLayer = 0,
447 .baseArrayLayer = 0, 450 .layerCount = VK_REMAINING_ARRAY_LAYERS,
448 .layerCount = VK_REMAINING_ARRAY_LAYERS, 451 },
449 },
450 }; 452 };
451 const VkImageMemoryBarrier write_barrier{ 453 const VkImageMemoryBarrier write_barrier{
452 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 454 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
453 .pNext = nullptr, 455 .pNext = nullptr,
454 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 456 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
455 .dstAccessMask = ACCESS_FLAGS, 457 .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
456 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 458 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
457 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 459 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
458 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 460 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
459 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 461 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
460 .image = image, 462 .image = image,
461 .subresourceRange = 463 .subresourceRange{
462 { 464 .aspectMask = aspect_mask,
463 .aspectMask = aspect_mask, 465 .baseMipLevel = 0,
464 .baseMipLevel = 0, 466 .levelCount = VK_REMAINING_MIP_LEVELS,
465 .levelCount = VK_REMAINING_MIP_LEVELS, 467 .baseArrayLayer = 0,
466 .baseArrayLayer = 0, 468 .layerCount = VK_REMAINING_ARRAY_LAYERS,
467 .layerCount = VK_REMAINING_ARRAY_LAYERS, 469 },
468 },
469 }; 470 };
470 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 471 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
471 read_barrier); 472 read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
569 scheduler.Finish(); 570 scheduler.Finish();
570} 571}
571 572
572ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { 573StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
573 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); 574 return staging_buffer_pool.Request(size, MemoryUsage::Upload);
574 return {
575 .handle = staging_ref.buffer,
576 .span = staging_ref.mapped_span,
577 };
578} 575}
579 576
580ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { 577StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
581 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); 578 return staging_buffer_pool.Request(size, MemoryUsage::Download);
582 return {
583 .handle = staging_ref.buffer,
584 .span = staging_ref.mapped_span,
585 };
586} 579}
587 580
588void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 581void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
754 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 747 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
755 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 748 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
756 VK_ACCESS_TRANSFER_WRITE_BIT, 749 VK_ACCESS_TRANSFER_WRITE_BIT,
757 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 750 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
758 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 751 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
759 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 752 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
760 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 753 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
765 VkImageMemoryBarrier{ 758 VkImageMemoryBarrier{
766 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 759 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
767 .pNext = nullptr, 760 .pNext = nullptr,
768 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | 761 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
769 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
770 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
771 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
772 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 762 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
773 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, 763 VK_ACCESS_TRANSFER_WRITE_BIT,
774 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 764 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
775 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 765 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
776 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 766 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
828 } 818 }
829} 819}
830 820
831void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 821void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
832 std::span<const BufferImageCopy> copies) {
833 // TODO: Move this to another API 822 // TODO: Move this to another API
834 scheduler->RequestOutsideRenderPassOperationContext(); 823 scheduler->RequestOutsideRenderPassOperationContext();
835 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 824 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
836 const VkBuffer src_buffer = map.handle; 825 const VkBuffer src_buffer = map.buffer;
837 const VkImage vk_image = *image; 826 const VkImage vk_image = *image;
838 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 827 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
839 const bool is_initialized = std::exchange(initialized, true); 828 const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
843 }); 832 });
844} 833}
845 834
846void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 835void Image::UploadMemory(const StagingBufferRef& map,
847 std::span<const VideoCommon::BufferCopy> copies) { 836 std::span<const VideoCommon::BufferCopy> copies) {
848 // TODO: Move this to another API 837 // TODO: Move this to another API
849 scheduler->RequestOutsideRenderPassOperationContext(); 838 scheduler->RequestOutsideRenderPassOperationContext();
850 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); 839 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
851 const VkBuffer src_buffer = map.handle; 840 const VkBuffer src_buffer = map.buffer;
852 const VkBuffer dst_buffer = *buffer; 841 const VkBuffer dst_buffer = *buffer;
853 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { 842 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
854 // TODO: Barriers 843 // TODO: Barriers
@@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
856 }); 845 });
857} 846}
858 847
859void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 848void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
860 std::span<const BufferImageCopy> copies) { 849 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
861 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 850 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
862 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
863 vk_copies](vk::CommandBuffer cmdbuf) { 851 vk_copies](vk::CommandBuffer cmdbuf) {
864 // TODO: Barriers 852 const VkImageMemoryBarrier read_barrier{
865 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); 853 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
854 .pNext = nullptr,
855 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
856 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
857 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
858 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
859 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
860 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
861 .image = image,
862 .subresourceRange{
863 .aspectMask = aspect_mask,
864 .baseMipLevel = 0,
865 .levelCount = VK_REMAINING_MIP_LEVELS,
866 .baseArrayLayer = 0,
867 .layerCount = VK_REMAINING_ARRAY_LAYERS,
868 },
869 };
870 const VkImageMemoryBarrier image_write_barrier{
871 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
872 .pNext = nullptr,
873 .srcAccessMask = 0,
874 .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
875 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
876 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
877 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
878 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
879 .image = image,
880 .subresourceRange{
881 .aspectMask = aspect_mask,
882 .baseMipLevel = 0,
883 .levelCount = VK_REMAINING_MIP_LEVELS,
884 .baseArrayLayer = 0,
885 .layerCount = VK_REMAINING_ARRAY_LAYERS,
886 },
887 };
888 const VkMemoryBarrier memory_write_barrier{
889 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
890 .pNext = nullptr,
891 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
892 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
893 };
894 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
895 0, read_barrier);
896 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
897 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
898 0, memory_write_barrier, nullptr, image_write_barrier);
866 }); 899 });
867} 900}
868 901
@@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1127 .pAttachments = attachments.data(), 1160 .pAttachments = attachments.data(),
1128 .width = key.size.width, 1161 .width = key.size.width,
1129 .height = key.size.height, 1162 .height = key.size.height,
1130 .layers = static_cast<u32>(num_layers), 1163 .layers = static_cast<u32>(std::max(num_layers, 1)),
1131 }); 1164 });
1132 if (runtime.device.HasDebuggingToolAttached()) { 1165 if (runtime.device.HasDebuggingToolAttached()) {
1133 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); 1166 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
10#include "video_core/texture_cache/texture_cache.h" 11#include "video_core/texture_cache/texture_cache.h"
11#include "video_core/vulkan_common/vulkan_memory_allocator.h" 12#include "video_core/vulkan_common/vulkan_memory_allocator.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
53 54
54namespace Vulkan { 55namespace Vulkan {
55 56
56struct ImageBufferMap {
57 [[nodiscard]] VkBuffer Handle() const noexcept {
58 return handle;
59 }
60
61 [[nodiscard]] std::span<u8> Span() const noexcept {
62 return span;
63 }
64
65 VkBuffer handle;
66 std::span<u8> span;
67};
68
69struct TextureCacheRuntime { 57struct TextureCacheRuntime {
70 const Device& device; 58 const Device& device;
71 VKScheduler& scheduler; 59 VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
76 64
77 void Finish(); 65 void Finish();
78 66
79 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); 67 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
80 68
81 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); 69 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
82 70
83 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 71 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
84 const std::array<Offset2D, 2>& dst_region, 72 const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
94 return false; 82 return false;
95 } 83 }
96 84
97 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, 85 void AccelerateImageUpload(Image&, const StagingBufferRef&,
98 std::span<const VideoCommon::SwizzleParameters>) { 86 std::span<const VideoCommon::SwizzleParameters>) {
99 UNREACHABLE(); 87 UNREACHABLE();
100 } 88 }
@@ -112,13 +100,12 @@ public:
112 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
113 VAddr cpu_addr); 101 VAddr cpu_addr);
114 102
115 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 103 void UploadMemory(const StagingBufferRef& map,
116 std::span<const VideoCommon::BufferImageCopy> copies); 104 std::span<const VideoCommon::BufferImageCopy> copies);
117 105
118 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 106 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
119 std::span<const VideoCommon::BufferCopy> copies);
120 107
121 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 108 void DownloadMemory(const StagingBufferRef& map,
122 std::span<const VideoCommon::BufferImageCopy> copies); 109 std::span<const VideoCommon::BufferImageCopy> copies);
123 110
124 [[nodiscard]] VkImage Handle() const noexcept { 111 [[nodiscard]] VkImage Handle() const noexcept {